From 1ca08a7861f072c3ade518490f13916887917849 Mon Sep 17 00:00:00 2001 From: romnnn Date: Sun, 11 Feb 2024 03:41:10 +0100 Subject: [PATCH] refactor - split statistics code into separate files - output latex tables as pdfs and pngs --- .ignore | 2 + Pipfile | 2 + gpucachesim/benchmarks.py | 74 +- gpucachesim/microbench/deprecated.py | 16 +- gpucachesim/microbench/pchase.py | 15 +- gpucachesim/plot/__init__.py | 6 +- gpucachesim/plot/cache.py | 6 +- gpucachesim/plot/equations.py | 149 + gpucachesim/stats/__init__.py | 3306 +- gpucachesim/stats/agg.py | 192 + gpucachesim/stats/common.py | 13 +- gpucachesim/stats/load.py | 181 + gpucachesim/stats/metrics.py | 194 + gpucachesim/stats/native.py | 6 +- gpucachesim/stats/parallel_table.py | 1709 + gpucachesim/stats/result_table.py | 626 + gpucachesim/stats/speed_table.py | 361 + gpucachesim/stats/stats.py | 6 +- gpucachesim/utils.py | 236 + plot/.gitignore | 2 + test-apps/test-apps-materialized.yml | 76598 ++++++------------------- test-apps/test-apps.yml | 34 +- 22 files changed, 22180 insertions(+), 61554 deletions(-) create mode 100644 .ignore create mode 100644 gpucachesim/plot/equations.py create mode 100644 gpucachesim/stats/agg.py create mode 100644 gpucachesim/stats/load.py create mode 100644 gpucachesim/stats/metrics.py create mode 100644 gpucachesim/stats/parallel_table.py create mode 100644 gpucachesim/stats/result_table.py create mode 100644 gpucachesim/stats/speed_table.py diff --git a/.ignore b/.ignore new file mode 100644 index 00000000..a03501b6 --- /dev/null +++ b/.ignore @@ -0,0 +1,2 @@ +**/*.pdf +**/*.png diff --git a/Pipfile b/Pipfile index cb14fd68..56df9da5 100644 --- a/Pipfile +++ b/Pipfile @@ -40,6 +40,8 @@ pycryptosat = "*" logicmin = "*" pyeda = "*" pyarrow = "*" +Pygments = "*" +pathvalidate = "*" [dev-packages] invoke = "*" diff --git a/gpucachesim/benchmarks.py b/gpucachesim/benchmarks.py index a692db36..afd85696 100644 --- a/gpucachesim/benchmarks.py +++ b/gpucachesim/benchmarks.py @@ -132,9 +132,7 @@ BENCH_TARGET_INDEX_COLS = ["target", "benchmark"] -PREVIEW_COLS = list( - BENCH_TARGET_INDEX_COLS + ["input_id"] + INDEX_COLS + SIMULATE_INPUT_COLS -) +PREVIEW_COLS = list(BENCH_TARGET_INDEX_COLS + ["input_id"] + INDEX_COLS + SIMULATE_INPUT_COLS) RATE_COLUMNS = [ "l2_hit_rate", @@ -237,9 +235,23 @@ def _map_dtype(dtype: str) -> str: assert len(missing_dtypes) == 0, "missing dtypes for {}".format(missing_dtypes) -CATEGORICAL_COLS = set( - [col for col, dtype in SPECIAL_DTYPES.items() if dtype == "category"] -) +CATEGORICAL_COLS = set([col for col, dtype in SPECIAL_DTYPES.items() if dtype == "category"]) + + +def benchmark_name_human_readable(name: str) -> str: + match name.lower(): + case "vectoradd": + return "VectorAdd" + case "matrixmul": + return "Matrixmul" + case "simple_matrixmul": + return "Naive Matrixmul" + case "transpose": + return "Transpose" + case "babelstream": + return "BabelStream" + case other: + return str(other) # def default_dtypes() -> typing.Dict[str, str]: @@ -449,18 +461,10 @@ def construct_playground_simulate_target_config(self, node): BenchmarkLoader.add_constructor("!Profile", construct_profile_target_config) BenchmarkLoader.add_constructor("!Trace", construct_trace_target_config) BenchmarkLoader.add_constructor("!Simulate", construct_simulate_target_config) -BenchmarkLoader.add_constructor( - "!ExecDrivenSimulate", construct_exec_driven_simulate_target_config -) -BenchmarkLoader.add_constructor( - "!AccelsimSimulate", construct_accelsim_simulate_target_config -) -BenchmarkLoader.add_constructor( - "!AccelsimTrace", construct_accelsim_trace_target_config -) -BenchmarkLoader.add_constructor( - "!PlaygroundSimulate", construct_playground_simulate_target_config -) +BenchmarkLoader.add_constructor("!ExecDrivenSimulate", construct_exec_driven_simulate_target_config) +BenchmarkLoader.add_constructor("!AccelsimSimulate", construct_accelsim_simulate_target_config) +BenchmarkLoader.add_constructor("!AccelsimTrace", construct_accelsim_trace_target_config) +BenchmarkLoader.add_constructor("!PlaygroundSimulate", construct_playground_simulate_target_config) class Benchmarks: @@ -504,24 +508,18 @@ def main(): @main.command() -@click.option( - "--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config" -) +@click.option("--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config") def count_bench_configs(path): print("loading", path) b = Benchmarks(path) benches = b.benchmarks[Target.Simulate.value] - total_bench_configs = sum( - [len(bench_configs) for bench_configs in benches.values()] - ) + total_bench_configs = sum([len(bench_configs) for bench_configs in benches.values()]) print("total bench configs: {}".format(total_bench_configs)) @main.command() -@click.option( - "--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config" -) +@click.option("--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config") @click.option("--baseline", type=bool, default=True, help="Baseline configurations") def table(path, baseline): print("loading", path) @@ -552,17 +550,13 @@ def is_baseline(config): return not baseline or all( [ config["values"].get("memory_only") in [False, None], - config["values"].get("num_clusters") - in [int(BASELINE["num_clusters"]), None], - config["values"].get("cores_per_cluster") - in [int(BASELINE["cores_per_cluster"]), None], + config["values"].get("num_clusters") in [int(BASELINE["num_clusters"]), None], + config["values"].get("cores_per_cluster") in [int(BASELINE["cores_per_cluster"]), None], config["values"].get("mode") in ["serial", None], ] ) - baseline_bench_configs = [ - config for config in bench_configs if is_baseline(config) - ] + baseline_bench_configs = [config for config in bench_configs if is_baseline(config)] print(bench_name) @@ -621,9 +615,7 @@ def is_baseline(config): @main.command() -@click.option( - "--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config" -) +@click.option("--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config") def list(path): print("loading", path) b = Benchmarks(path) @@ -633,9 +625,7 @@ def list(path): @main.command() -@click.option( - "--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config" -) +@click.option("--path", default=DEFAULT_BENCH_FILE, help="Path to materialized benchmark config") def fix(path): print("loading", path) b = Benchmarks(path) @@ -705,9 +695,7 @@ def fix(path): pass try: - if (result_dir / "Simulate").is_dir() and ( - result_dir / "simulate" - ).is_dir(): + if (result_dir / "Simulate").is_dir() and (result_dir / "simulate").is_dir(): # Simulate is newer shutil.rmtree(result_dir / "simulate") os.rename(result_dir / "Simulate", result_dir / "simulate") diff --git a/gpucachesim/microbench/deprecated.py b/gpucachesim/microbench/deprecated.py index 28ef0b3b..a776d67b 100644 --- a/gpucachesim/microbench/deprecated.py +++ b/gpucachesim/microbench/deprecated.py @@ -487,9 +487,11 @@ def custom_dist(a, b): # "xor_bit": sym.logic.boolalg.Or(*[sym.logic.boolalg.And(term, sym.symbols(f"b{bit}")) for term in ff.args]), "xor_bit": sym.logic.boolalg.Or( *[ - sym.logic.boolalg.Xor(term, bit) - if contains_var(f.args[i], var=bit) - else term + ( + sym.logic.boolalg.Xor(term, bit) + if contains_var(f.args[i], var=bit) + else term + ) for i, term in enumerate(ff.args) ] ), @@ -892,9 +894,11 @@ def custom_dist(a, b): str( color( int(predicted), - fg="green" - if bool(predicted) == bool(target_bit) - else "red", + fg=( + "green" + if bool(predicted) == bool(target_bit) + else "red" + ), ) ), str(color("<==", fg="blue")) if addr in marks else "", diff --git a/gpucachesim/microbench/pchase.py b/gpucachesim/microbench/pchase.py index c4d6171c..244a0687 100644 --- a/gpucachesim/microbench/pchase.py +++ b/gpucachesim/microbench/pchase.py @@ -2321,6 +2321,15 @@ def find_cache_set_mapping( print(combined.head(n=10)) print(combined.shape) + combined_binary = combined.copy() + combined_binary["offset"] = combined_binary["offset"].apply(lambda x: np.binary_repr(x, width=num_sets_log2)) + combined_binary["set"] = combined_binary["set"].apply(lambda x: np.binary_repr(x, width=num_sets_log2)) + highest_virt_addr = combined_binary["virt_addr"].max() + combined_binary["virt_addr"] = combined_binary["virt_addr"].apply(lambda x: np.binary_repr(x, width=int(np.ceil(np.log2(highest_virt_addr))) + 1)) + print(combined_binary.head(n=10)) + print(combined_binary.shape) + + print(compute_set_probability(combined)) # return @@ -2977,9 +2986,9 @@ def is_periodic( last_unique_miss_cache_lines = unique_miss_cache_lines.copy() for miss_cache_line in unique_miss_cache_lines: - combined.loc[ - combined["cache_line"] == miss_cache_line, "mapped_set" - ] = set_idx + combined.loc[combined["cache_line"] == miss_cache_line, "mapped_set"] = ( + set_idx + ) cache_line_set_mapping.loc[ cache_line_set_mapping["cache_line"] == miss_cache_line, "mapped_set" diff --git a/gpucachesim/plot/__init__.py b/gpucachesim/plot/__init__.py index 5a29e558..38a1307e 100644 --- a/gpucachesim/plot/__init__.py +++ b/gpucachesim/plot/__init__.py @@ -5,6 +5,8 @@ from gpucachesim import REPO_ROOT_DIR PLOT_DIR = REPO_ROOT_DIR / "plot" +TABLE_DIR = REPO_ROOT_DIR / "plot/tables" +EQUATIONS_DIR = REPO_ROOT_DIR / "plot/equations" PLOTLY_PDF_OPTS = dict(format="pdf", scale=8) @@ -131,8 +133,6 @@ def human_format_thousands(num, round_to=2, variable_precision=False): ["", "K", "M", "G", "T", "P"][magnitude], ) return "{}{}".format( - round_to_precision_str( - num, round_to=round_to, variable_precision=variable_precision - ), + round_to_precision_str(num, round_to=round_to, variable_precision=variable_precision), ["", "K", "M", "G", "T", "P"][magnitude], ) diff --git a/gpucachesim/plot/cache.py b/gpucachesim/plot/cache.py index d8cd27c3..c875c00e 100644 --- a/gpucachesim/plot/cache.py +++ b/gpucachesim/plot/cache.py @@ -210,9 +210,9 @@ def title(id) -> str: alloc_times[int(partition_id), int(set_id), int(assoc_id), :] = row_df[ "sector_alloc_time" ].to_numpy() - last_access_times[ - int(partition_id), int(set_id), int(assoc_id), : - ] = row_df["last_sector_access_time"].to_numpy() + last_access_times[int(partition_id), int(set_id), int(assoc_id), :] = ( + row_df["last_sector_access_time"].to_numpy() + ) states = states.reshape((partitions, sets, -1)) allocations = allocations.reshape((partitions, sets, -1)) diff --git a/gpucachesim/plot/equations.py b/gpucachesim/plot/equations.py new file mode 100644 index 00000000..ab4a35c0 --- /dev/null +++ b/gpucachesim/plot/equations.py @@ -0,0 +1,149 @@ +import click +import gpucachesim.plot as plot +import gpucachesim.utils as utils +from wasabi import color + + +@click.group() +# @click.pass_context +def main(): + # ctx.ensure_object(dict) + pass + + +@main.command() +def equations(): + equations = [ + # ( + # "amdahl_n", + # r""" + # \begin{equation} + # n=8 + # \end{equation} + # """, + # ), + ( + "amdahl", + # 1 / ((1 - p) + p / n) + r""" +\begin{align*} +S_{\text{Amdahl}}(p, n) = \frac{1}{1 - (1-p) + \frac{p}{n}} +\end{align*} + """, + ), + ( + "gustafson", + # 1 + (n - 1) * p + r""" +\begin{align*} +S_{\text{Gustafson}}(p, n) = 1 + (n-1)p +\end{align*} + """, + ), + ( + "amdahl_p_90", + r""" +\begin{align*} +S_{\text{Amdahl}}(0.90, 8) = 4.8 +\end{align*} +""", + ), + ( + "amdahl_p_83", + r""" +\begin{align*} +S_{\text{Amdahl}}(0.83, 8) = 3.6 +\end{align*} + """, + ), + ( + "gustafson_p_90", + r""" +\begin{align*} +S_{\text{Gustafson}}(0.90, 8) = 7.3 +\end{align*} + """, + ), + ( + "gustafson_p_83", + r""" +\begin{align*} +S_{\text{Gustafson}}(0.83, 8) = 6.8 +\end{align*} + """, + ), + ( + "offset_bit_0", + r"\begin{align*}" + + "\n" + + r"\Delta_0(a)=& (a_{10} \lor a_{12} \lor a_{14}) " + + r" \land (a_{10} \lor \neg a_{12} \lor \neg a_{14}) " + + r" \land (a_{12} \lor \neg a_{10} \lor \neg a_{14}) " + + r" \land (a_{14} \lor \neg a_{10} \lor \neg a_{12}) \\" + + r" & \land (\neg a_{11} \lor \neg a_{13} \lor \neg a_{14}) " + + r" \land (\neg a_{12} \lor \neg a_{13} \lor \neg a_{14}) " + + r" \land (\neg a_{13} \lor \neg a_{14} \lor \neg a_{9}) " + + "\n" + + r"\end{align*}", + ), + # (b10 | b12 | b14) & (b10 | ~b12 | ~b14) & (b12 | ~b10 | ~b14) & (b14 | ~b10 | ~b12) & (~b11 | ~b13 | ~b14) & (~b12 | ~b13 | ~b14) & (~b13 | ~b14 | ~b9) + ( + "offset_bit_1", + r"\begin{align*}" + + "\n" + + r"\Delta_1(a)=(a_{11} \land a_{13} \land \neg a_{9}) " + + r"\lor (a_{11} \land a_{9} \land \neg a_{13}) " + + r"\lor (a_{13} \land a_{9} \land \neg a_{11}) " + + r"\lor (\neg a_{11} \land \neg a_{13} \land \neg a_{9}) " + + "\n" + + r"\end{align*}", + ), + # (b11 & b13 & ~b9) | (b11 & b9 & ~b13) | (b13 & b9 & ~b11) | (~b11 & ~b13 & ~b9 + ( + "offset_bit_xor", + r"\begin{align*}" + + "\n" + + r"\Delta_0(a)&=a_{10} \xor a_{12} \xor a_{14} \\" + + r"\Delta_1(a)&=\neg \Delta_0(a) \xor a_{9} \xor a_{10} \xor a_{11} \xor a_{12} \xor a_{13} \xor a_{14}" + + "\n" + + r"\end{align*}", + ), + ] + + for name, tex in equations: + tex_code = r""" +\documentclass[preview]{standalone} +""" + tex_code += utils.TEX_PACKAGES + tex_code += r""" +\begin{document} +""" + tex_code += tex + tex_code += r""" +\end{document} + """ + + assert isinstance(tex_code, str) + print(tex_code) + pdf_output_path = (plot.EQUATIONS_DIR / name).with_suffix(".pdf") + try: + utils.render_latex(tex_code, output_path=pdf_output_path) + pass + except Exception as e: + print(tex_code) + print("##################") + raise ValueError(tex_code) + # return + raise e + + print(color("wrote {}".format(pdf_output_path), fg="cyan")) + + png_output_path = (plot.EQUATIONS_DIR / "png" / name).with_suffix(".png") + utils.convert_to_png(input_path=pdf_output_path, output_path=png_output_path, density=600) + print(color("wrote {}".format(png_output_path), fg="cyan")) + + pass + + +if __name__ == "__main__": + main() diff --git a/gpucachesim/stats/__init__.py b/gpucachesim/stats/__init__.py index 9c405242..a137faef 100644 --- a/gpucachesim/stats/__init__.py +++ b/gpucachesim/stats/__init__.py @@ -25,9 +25,13 @@ import gpucachesim.stats.playground import gpucachesim.stats.common as common import gpucachesim.benchmarks as benchmarks +import gpucachesim.stats.parallel_table +import gpucachesim.stats.speed_table +import gpucachesim.stats.result_table import gpucachesim.plot as plot import gpucachesim.utils as utils +from gpucachesim.stats.load import load_stats from gpucachesim.benchmarks import ( Target, @@ -201,2991 +205,145 @@ def different_cols(df): return [col for col in df.columns if len(df[col].value_counts()) > 1] -class ParallelTableRow(typing.NamedTuple): - metric: str - threads: int - serial_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] - det_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] - nondet_values: typing.Sequence[typing.Tuple[float, typing.Union[float, int, str]]] - - def values(self): - values = [] - if self.serial_value is not None: - values.append(self.serial_value[0]) - if self.det_value is not None: - values.append(self.det_value[0]) - values += [v[0] for v in self.nondet_values] - return values - - -def build_parallel_table_rows( - df: pd.DataFrame, - num_bench_configs: int, - thousands_round_to=1, - variable_precision=True, -) -> typing.Sequence[ParallelTableRow]: - # interleave_n = list(itertools.product([False, True], [5, 10])) - run_ahead_values = [5, 10] - for run_ahead in run_ahead_values: - # print(df["input_run_ahead_parallel"].unique()) - assert run_ahead in df["input_run_ahead_parallel"].unique() - - table_rows: typing.Sequence[ParallelTableRow] = [] - - assert num_bench_configs > 0 - multiple_bench_configs = num_bench_configs > 1 - - for threads in [4, 8]: - threads_mask = df["input_threads_parallel"] == threads - det_mask = df["input_mode_parallel"] == "deterministic" - nondet_mask = df["input_mode_parallel"] == "nondeterministic" - - preview_cols = ( - benchmarks.BENCH_TARGET_INDEX_COLS - + ["kernel_name", "kernel_launch_id", "run"] - + list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS)) - + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - + [col + "_parallel" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - + [ - "exec_time_sec_parallel", - "input_id_parallel", - "input_id_serial", - "cycles_serial", - "cycles_parallel", - "cycles_mape", - # "dram_reads_serial", - # "dram_reads_parallel", - # "dram_reads_rel_err", - # "dram_writes_serial", - # "dram_writes_parallel", - # "dram_writes_rel_mape", - ] - # + different_cols(det) - ) - preview_cols = [col for col in preview_cols if col in df] - - all_parallel = df[(nondet_mask | det_mask) & threads_mask] - - # diff = set(preview_cols) - set(list(all_parallel.columns)) - # print(diff) - - # benchmarks.BENCH_TARGET_INDEX_COLS - # + ["kernel_name", "kernel_launch_id", "run"] - # + list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS)) - # + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - - print( - color( - "==> max speedup for {} threads is {}".format( - threads, all_parallel["exec_time_sec_speedup"].max() - ), - fg="green", - ) - ) - - weird_mask = all_parallel["exec_time_sec_speedup"] > threads - weird = all_parallel.loc[weird_mask, preview_cols] - if len(weird) > 0: - print( - color( - "WARNING: weird results for {} threads:".format(threads), fg="red" - ) - ) - print(weird.T) - print("===") - # assert len(weird) == 0 - - # nondet_no_interleave_mask = df["input_mode_parallel"] == "nondeterministic" - # nondet_interleave_mask = ( - # df["input_mode_parallel"] == "nondeterministic_interleave" - # ) - # print([m.sum() for m in [ - # mask, threads_mask, det_mask, nondet_no_interleave_mask, nondet_interleave_mask - # ]]) - - det = df[threads_mask & det_mask] - if False: - if num_bench_configs > 1: - print(det.loc[det["benchmark"] == "vectorAdd", preview_cols].T) - else: - print(det.loc[:, preview_cols].T) - - all_nondet = df[threads_mask & nondet_mask] - # nondet_no_interleave = df[threads_mask & nondet_no_interleave_mask] - # nondet_interleave = df[threads_mask & nondet_interleave_mask] - - print( - "num deterministic={} num nondeterministic={} num benchmark configs={}".format( - len(det), len(all_nondet), num_bench_configs - ) - ) - - # print(det) - assert len(det) == num_bench_configs - assert len(all_nondet) == len(run_ahead_values) * num_bench_configs - - # assert len(nondet_no_interleave) == 2 * num_bench_configs - # assert len(nondet_interleave) == 2 * num_bench_configs - # assert ( - # len( - # df[[ - # "exec_time_sec_serial", - # "cycles_serial", - # "input_id_serial", - # ]].drop_duplicates() - # ) - # == 1 - # ) - - parallel_preview_cols = list( - benchmarks.BENCH_TARGET_INDEX_COLS - + ["input_id_serial", "input_id_parallel"] - + benchmarks.INDEX_COLS - + [c for c in benchmarks.SIMULATE_INPUT_COLS] - + [c + "_parallel" for c in benchmarks.SIMULATE_INPUT_COLS] - + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) - ) - parallel_preview_cols += [ - "total_cores_parallel", - "num_blocks_parallel", - "mean_blocks_per_sm_parallel", - "exec_time_sec_parallel", - "exec_time_sec_speedup", - ] - parallel_preview_cols = [col for col in parallel_preview_cols if col in df] - - spacer = " " + ("=" * 20) + " " - - # exec time (speedup) - serial_exec_time = df.loc[threads_mask, "exec_time_sec_serial"].mean() - det_exec_time = det["exec_time_sec_parallel"].mean() - det_speedup = det["exec_time_sec_speedup"].mean() - if multiple_bench_configs: - print("") - print( - spacer - + "DETERMINISTIC {} threads={}".format(det.shape, threads) - + spacer - ) - print(det[parallel_preview_cols][:8].T) - - # make sure we aggregate a single functional config only - assert det["input_cores_per_cluster"].nunique() == 1 - assert det["input_num_clusters"].nunique() == 1 - assert det["input_memory_only"].nunique() == 1 - - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - # print("run ahead={}".format(run_ahead)) - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - - print("") - print( - spacer - + "NONDETERMINISTIC {} threads={} run ahead={}".format( - nondet.shape, threads, run_ahead - ) - + spacer - ) - print(nondet[parallel_preview_cols][:8].T) - - # print(nondet.T) - # assert len(nondet) == 1 - assert len(nondet) == num_bench_configs - - nondet_exec_time = nondet["exec_time_sec_parallel"].mean() - nondet_speedup = nondet["exec_time_sec_speedup"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - nondet_speedup, - "${}x$".format( - plot.round_to_precision_str( - nondet_speedup, - round_to=1, - variable_precision=variable_precision, - ) - ), - ) - ) - - else: - nondet_values.append( - ( - nondet_exec_time, - "${:>3.1f}s~({}x)$".format( - nondet_exec_time, - plot.round_to_precision_str( - nondet_speedup, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else (serial_exec_time, "${:>3.1f}s$".format(serial_exec_time)) - ) - if multiple_bench_configs: - det_value = ( - det_speedup, - "${}x$".format( - plot.round_to_precision_str( - det_speedup, round_to=1, variable_precision=variable_precision - ) - ), - ) - else: - det_value = ( - det_exec_time, - "${:>3.1f}s~({}x)$".format( - det_exec_time, - plot.round_to_precision_str( - det_speedup, round_to=1, variable_precision=variable_precision - ), - ), - ) - table_rows.append( - ParallelTableRow( - metric=r"exec\\time", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - - # cycles (rel err) - serial_cycles = int(df.loc[threads_mask, "cycles_serial"].mean()) - det_cycles = int(det["cycles_parallel"].mean()) - det_rel_err = det["cycles_mape"].mean() - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - # assert len(nondet) == 1 - # assert len(nondet) == num_bench_configs - - nondet_cycles = int(nondet["cycles_parallel"].mean()) - nondet_rel_err = nondet["cycles_mape"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - nondet_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ) - ), - ) - ) - else: - nondet_values.append( - ( - nondet_cycles, - "${} ({}\\%)$".format( - plot.human_format_thousands( - nondet_cycles, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else ( - serial_cycles, - "${}$".format( - plot.human_format_thousands( - serial_cycles, - round_to=thousands_round_to, - variable_precision=variable_precision, - ) - ), - ) - ) - if multiple_bench_configs: - det_value = ( - 100.0 * det_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ) - ), - ) - else: - det_value = ( - det_cycles, - "${} ({}\\%)$".format( - plot.human_format_thousands( - det_cycles, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - table_rows.append( - ParallelTableRow( - metric="cycles", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - - # l1 data hit rate (rel err) - serial_l1_hit_rate = df.loc[threads_mask, "l1_hit_rate_serial"].mean() - det_l1_hit_rate = det["l1_hit_rate_parallel"].mean() - det_rel_err = det["l1_hit_rate_mae"].mean() - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - # assert len(nondet) == 1 - # assert len(nondet) == num_bench_configs - - nondet_l1_hit_rate = nondet["l1_hit_rate_parallel"].mean() - nondet_rel_err = nondet["l1_hit_rate_mae"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - 100.0 * nondet_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - else: - nondet_values.append( - ( - 100.0 * nondet_l1_hit_rate, - "${}\\%~({}\\%)$".format( - plot.round_to_precision_str( - 100.0 * nondet_l1_hit_rate, - round_to=1, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else ( - 100.0 * serial_l1_hit_rate, - "${:>2.1f}\\%$".format(100.0 * serial_l1_hit_rate), - ) - ) - if multiple_bench_configs: - det_value = ( - 100.0 * det_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - else: - det_value = ( - 100.0 * det_l1_hit_rate, - "${}\\%~({}\\%)$".format( - plot.round_to_precision_str( - 100.0 * det_l1_hit_rate, - round_to=1, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - - table_rows.append( - ParallelTableRow( - metric=r"L1D\\hit rate", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - - # l2 data hit rate (rel err) - serial_l2_hit_rate = df.loc[threads_mask, "l2_hit_rate_serial"].mean() - det_l2_hit_rate = det["l2_hit_rate_parallel"].mean() - det_rel_err = det["l2_hit_rate_mae"].mean() - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - # assert len(nondet) == 1 - # assert len(nondet) == num_bench_configs - - nondet_l2_hit_rate = nondet["l2_hit_rate_parallel"].mean() - nondet_rel_err = nondet["l2_hit_rate_mae"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - 100.0 * nondet_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - else: - nondet_values.append( - ( - 100.0 * nondet_l2_hit_rate, - "${}\\%~({}\\%)$".format( - plot.round_to_precision_str( - 100.0 * nondet_l2_hit_rate, - round_to=1, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else ( - 100.0 * serial_l2_hit_rate, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * serial_l2_hit_rate, - round_to=1, - variable_precision=variable_precision, - ) - ), - ) - ) - if multiple_bench_configs: - det_value = ( - 100.0 * det_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - else: - det_value = ( - 100.0 * det_l2_hit_rate, - "${}\\%~({}\\%)$".format( - plot.round_to_precision_str( - 100.0 * det_l2_hit_rate, - round_to=1, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - table_rows.append( - ParallelTableRow( - metric=r"L2D\\hit rate", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - - # dram reads (rel err) - serial_dram_reads = int(df.loc[threads_mask, "dram_reads_serial"].mean()) - det_dram_reads = int(det["dram_reads_parallel"].mean()) - det_rel_err = det["dram_reads_smape"].mean() - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - # assert len(nondet) == 1 - # assert len(nondet) == num_bench_configs - - nondet_dram_reads = int(nondet["dram_reads_parallel"].mean()) - nondet_rel_err = nondet["dram_reads_smape"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - nondet_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - else: - nondet_values.append( - ( - nondet_dram_reads, - "${} ({}\\%)$".format( - plot.human_format_thousands( - nondet_dram_reads, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else ( - serial_dram_reads, - "${}$".format( - plot.human_format_thousands( - serial_dram_reads, - round_to=thousands_round_to, - variable_precision=variable_precision, - ) - ), - ) - ) - if multiple_bench_configs: - det_value = ( - 100.0 * det_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - else: - det_value = ( - det_dram_reads, - "${} ({}\\%)$".format( - plot.human_format_thousands( - det_dram_reads, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - - table_rows.append( - ParallelTableRow( - metric=r"DRAM\\reads", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - - # dram writes (rel err) - serial_dram_writes = int(df.loc[threads_mask, "dram_writes_serial"].mean()) - det_dram_writes = int(det["dram_writes_parallel"].mean()) - det_rel_err = det["dram_writes_smape"].mean() - nondet_values = [] - # for interleave, n in interleave_n: - for run_ahead in run_ahead_values: - # nondet = nondet_interleave if interleave else nondet_no_interleave - nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] - # assert len(nondet) == 1 - # assert len(nondet) == num_bench_configs - - nondet_dram_writes = int(nondet["dram_writes_parallel"].mean()) - nondet_rel_err = nondet["dram_writes_smape"].mean() - if multiple_bench_configs: - nondet_values.append( - ( - 100.0 * nondet_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - else: - nondet_values.append( - ( - nondet_dram_writes, - "${} ({}\\%)$".format( - plot.human_format_thousands( - nondet_dram_writes, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * nondet_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - ) - - serial_value = ( - None - if multiple_bench_configs - else ( - serial_dram_writes, - "${}$".format( - plot.human_format_thousands( - serial_dram_writes, - round_to=thousands_round_to, - variable_precision=variable_precision, - ) - ), - ) - ) - if multiple_bench_configs: - det_value = ( - 100.0 * det_rel_err, - "${}\\%$".format( - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - else: - det_value = ( - det_dram_writes, - "${} ({}\\%)$".format( - plot.human_format_thousands( - det_dram_writes, - round_to=thousands_round_to, - variable_precision=variable_precision, - ), - plot.round_to_precision_str( - 100.0 * det_rel_err, - round_to=1, - variable_precision=variable_precision, - ), - ), - ) - table_rows.append( - ParallelTableRow( - metric=r"DRAM\\writes", - threads=threads, - serial_value=serial_value, - det_value=det_value, - nondet_values=nondet_values, - ) - ) - return table_rows - - -def slowdown(baseline, values): - return values / baseline - - -def speedup(baseline, values): - return baseline / values - - -def geo_mean(values: np.ndarray) -> np.ndarray: - a = np.array(values) - return a.prod() ** (1.0 / len(a)) - - -# def geo_mean(values: np.narray): -# return np.exp(np.log(values).mean()) - - -def bounded_relative_absolute_error( - true_values: np.ndarray, values: np.ndarray, **kwargs -) -> np.ndarray: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - correct = values == true_values - - # we only deal with positive numbers - assert np.all(values >= 0.0) - assert np.all(true_values >= 0.0) - - brae = values.abs() / (values.abs() + true_values.abs()) - brae = brae.fillna(0.0) - # brae[brae] = 0.0 - brae[brae == 0.0] = 0.0 - return brae - - -def rel_err( - true_values: np.ndarray, values: np.ndarray, eps: typing.Optional[float] = None -) -> np.ndarray: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - correct = values == true_values - - # we only deal with positive numbers - assert np.all(values >= 0.0) - assert np.all(true_values >= 0.0) - - # because we only use posive numbers, we can safely clip to a small positive epsilon - # if eps is not None: - # values = values + eps - # true_values = true_values + eps - # # true_values = np.clip(true_values, a_min=eps, a_max=None) - rel_err = (values - true_values).abs() / true_values - # rel_err = values.abs() / (values.abs() + true_values.abs()) - - # print(values) - # print(true_values) - # print(values == true_values) - rel_err = rel_err.fillna(0.0) - rel_err[correct] = 0.0 - rel_err[rel_err == 0.0] = 0.0 - - return rel_err - - -def rpd(true_values: np.ndarray, values: np.ndarray): - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - pass - # rel_err = (values - true_values).abs() / true_values - # rel_err = rel_err.fillna(0.0) - # rel_err[rel_err == 0.0] = 0.0 - # return rel_err - - -def mse(true_values, values) -> float: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - return sklearn.metrics.mean_squared_error(true_values, values) - - -def rmse_real(true_values, values) -> float: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - return ((values - true_values) ** 2).mean() ** 0.5 - - -def rmse(true_values, values) -> float: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - diff = values - true_values - scale = values.abs() + true_values.abs() - return (diff / scale).mean() - - -def abs_err(true_values: np.ndarray, values: np.ndarray) -> np.array: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - return (true_values - values).abs() - # return sklearn.metrics.mean_absolute_error(true_values, values) - - -def smape(true_values: np.ndarray, values: np.ndarray) -> float: - """SMAPE (symmetric)""" - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - - smape = (values - true_values).abs() / (values.abs() + true_values.abs()) - smape[values == true_values] = 0.0 - return smape.mean() - - -def ermsle(true_values: np.ndarray, values: np.ndarray) -> float: - """ERMSLE: Exponential root mean square log error""" - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - ratios = values / true_values - ratios[values == true_values] = 1.0 - - log_ratios = np.empty_like(ratios) - valid_mask = np.isfinite(ratios) & ratios != 0 - - # temp - ratios[~valid_mask] = 1.0 - log_ratios = np.abs(np.log(ratios)) ** 2 - # undo temp - log_ratios[~valid_mask] = np.nan - # mean - rmsle = np.sqrt(np.mean(log_ratios[valid_mask])) - # exponential - rmsle = np.abs(np.exp(rmsle)) - return rmsle - - -def emale(true_values: np.ndarray, values: np.ndarray) -> float: - """EMALE: Exponential mean absolute log error""" - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - ratios = values / true_values - ratios[values == true_values] = 1.0 - - log_ratios = np.empty_like(ratios) - valid_mask = np.isfinite(ratios) & ratios != 0 - - # temp - ratios[~valid_mask] = 1.0 - log_ratios = np.abs(np.log(ratios)) - # undo temp - log_ratios[~valid_mask] = np.nan - # mean - male = np.mean(log_ratios[valid_mask]) - # exponential - emale = np.abs(np.exp(male)) - return emale - - -def mape(true_values: np.ndarray, values: np.ndarray) -> np.array: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - return sklearn.metrics.mean_absolute_percentage_error(true_values, values) - - -def correlation(true_values: np.ndarray, values: np.ndarray, atol=None) -> float: - values = values.fillna(0.0) - true_values = true_values.fillna(0.0) - # print("true values", true_values) - # print("values", values) - # print("values sum", values.sum()) - # print("values stddev", values.std()) - # print("true values stddev", true_values.std()) - # if values.sum() > 0 and : - assert np.all(np.isfinite(values)) - assert np.all(np.isfinite(true_values)) - - # this does not change anything about the std dev - # values += 1.0 - # true_values += 1.0 - - if values.std() != 0 and true_values.std() != 0: - return np.corrcoef(true_values, values)[0][1] - elif atol is not None and np.allclose( - np.amin([values, true_values], axis=0), - np.amax([values, true_values], axis=0), - atol=atol, - ): - return 1.0 - else: - assert len(values) == len(true_values) - assert len(np.amin([values, true_values], axis=0)) == len(values) - a = np.amin([values, true_values], axis=0) - b = np.amax([values, true_values], axis=0) - print(a, b) - print(np.abs(a - b)) - return np.nan - - -class TargetDataframes(typing.NamedTuple): - native_df: pd.DataFrame - accelsim_df: pd.DataFrame - serial_gpucachesim_df: pd.DataFrame - serial_gpucachesim_mem_only_df: pd.DataFrame - serial_gpucachesim_exec_driven_df: pd.DataFrame - parallel_gpucachesim_df: pd.DataFrame - - -def aggregate_mean_input_config_stats( - df: pd.DataFrame, - per_kernel=True, - mean=True, - inspect=False, -) -> typing.Tuple[pd.DataFrame, typing.List[str]]: - bench_input_cols = copy.deepcopy(list(benchmarks.ALL_BENCHMARK_INPUT_COLS)) - input_cols = copy.deepcopy(benchmarks.SIMULATE_INPUT_COLS) - input_config_group_cols = list( - benchmarks.BENCH_TARGET_INDEX_COLS - + input_cols - + bench_input_cols - + ["input_id"] - ) - input_config_group_cols = [col for col in input_config_group_cols if col in df] - - preview_cols = [ - "target", - "benchmark", - "input_id", - "run", - "kernel_launch_id", - "kernel_name", - "kernel_name_mangled", - ] - preview_cols += ["exec_time_sec"] - preview_cols += ["cycles"] - # print(df.loc[:,preview_cols][:]) - - if not per_kernel: - # sum metrics for all kernels per input_id and run - group_cols = input_config_group_cols + ["run"] - - # TODO: - # how do we deal with NOT summing cycles for kernel, while - # we want to sum the execution time - aggregations = { - **{c: "sum" for c in sorted(df.columns)}, - **{c: "mean" for c in benchmarks.RATE_COLUMNS}, - # **{c: "first" for c in bench_input_cols + input_cols}, - **benchmarks.NON_NUMERIC_COLS, - } - aggregations = { - col: agg - for col, agg in aggregations.items() - if col in df and not col in group_cols - } - - grouped = df.groupby(group_cols, dropna=False) - - def _inspect_per_config(df): - print("\nINSPECT: metrics (per input config, PER RUN)") - print(df.loc[:, preview_cols][:10]) - pass - - if inspect: - grouped[df.columns].apply(_inspect_per_config) - df = grouped.agg(aggregations).reset_index() - - # we no longer have kernels now - df["kernel_launch_id"] = np.nan - df["kernel_name"] = np.nan - df["kernel_name_mangled"] = np.nan - - # compute mean per input_id and kernel launch id over all runs - group_cols = input_config_group_cols + ["kernel_launch_id", "kernel_name"] - - if mean: - aggregations = { - **{c: "mean" for c in sorted(df.columns)}, - **{c: "first" for c in bench_input_cols + input_cols}, - **benchmarks.NON_NUMERIC_COLS, - } - aggregations = { - col: agg - for col, agg in aggregations.items() - if col in df and not col in group_cols - } - grouped = df.groupby(group_cols, dropna=False) - - def _inspect_per_config_per_kernel(df): - print("\nINSPECT: metrics (per input config, PER KERNEL)") - print(df.loc[:, preview_cols][:10]) - pass - - if inspect: - grouped[df.columns].apply(_inspect_per_config_per_kernel) - df = grouped.agg(aggregations).reset_index() - - return df.copy(), group_cols - - -class FunctionalConfig(typing.TypedDict): - num_clusters: int - cores_per_cluster: int - - -def split_into_target_dfs( - df, - per_kernel=False, - mean=False, - functional_config: typing.Optional[FunctionalConfig] = None, -) -> TargetDataframes: - df = df.reset_index() - - baseline_cores_per_cluster = benchmarks.BASELINE["cores_per_cluster"] - baseline_num_clusters = benchmarks.BASELINE["num_clusters"] - functional_config = FunctionalConfig( - cores_per_cluster=baseline_cores_per_cluster, - num_clusters=baseline_num_clusters, - ) - - def _label(label, shape): - return "{:>50}\t{}".format(label, shape) - - # native - native_mask = df["target"] == Target.Profile.value - native_df = df[native_mask] - native_df, _ = aggregate_mean_input_config_stats( - native_df, per_kernel=per_kernel, mean=mean - ) - print(_label("native", native_df.shape)) - - # accelsim - accelsim_mask = df["target"] == Target.AccelsimSimulate.value - accelsim_df = df[accelsim_mask] - accelsim_df, _ = aggregate_mean_input_config_stats( - accelsim_df, per_kernel=per_kernel, mean=mean - ) - print(_label("accelsim", accelsim_df.shape)) - - # gpucachesim (serial) - serial_gpucachesim_mask = df["target"] == Target.Simulate.value - serial_gpucachesim_mask &= df["input_mode"].isin(["serial", np.nan]) - serial_gpucachesim_mask &= df["input_memory_only"] == False - if functional_config is not None: - serial_gpucachesim_mask &= ( - df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] - ) - serial_gpucachesim_mask &= ( - df["input_num_clusters"] == functional_config["num_clusters"] - ) - serial_gpucachesim_df = df[serial_gpucachesim_mask] - serial_gpucachesim_df, _ = aggregate_mean_input_config_stats( - serial_gpucachesim_df, per_kernel=per_kernel, mean=mean - ) - print(_label("serial gpucachesim", serial_gpucachesim_df.shape)) - - # gpucachesim (serial, mem only) - serial_gpucachesim_mem_only_mask = df["target"] == Target.Simulate.value - serial_gpucachesim_mem_only_mask &= df["input_memory_only"] == True - serial_gpucachesim_mem_only_mask &= df["input_mode"].isin(["serial", np.nan]) - if functional_config is not None: - serial_gpucachesim_mem_only_mask &= ( - df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] - ) - serial_gpucachesim_mem_only_mask &= ( - df["input_num_clusters"] == functional_config["num_clusters"] - ) - serial_gpucachesim_mem_only_df = df[serial_gpucachesim_mem_only_mask] - serial_gpucachesim_mem_only_df, _ = aggregate_mean_input_config_stats( - serial_gpucachesim_mem_only_df, per_kernel=per_kernel, mean=mean - ) - print(_label("serial gpucachesim (mem only)", serial_gpucachesim_mem_only_df.shape)) - - # gpucachesim (serial, exec-driven) - serial_gpucachesim_exec_driven_mask = ( - df["target"] == Target.ExecDrivenSimulate.value - ) - # print("mask num", sum(serial_gpucachesim_exec_driven_mask)) - # print(df.loc[serial_gpucachesim_exec_driven_mask, ["target", "input_memory_only", "input_mode"]]) - serial_gpucachesim_exec_driven_mask &= df["input_mode"].isin(["serial", "", np.nan]) - serial_gpucachesim_exec_driven_df = df[serial_gpucachesim_exec_driven_mask] - serial_gpucachesim_exec_driven_df, _ = aggregate_mean_input_config_stats( - serial_gpucachesim_exec_driven_df, per_kernel=per_kernel, mean=mean - ) - print( - _label( - "serial gpucachesim (exec driven)", serial_gpucachesim_exec_driven_df.shape - ) - ) - - # gpucachesim (parallel) - parallel_gpucachesim_mask = df["target"] == Target.Simulate.value - parallel_gpucachesim_mask &= df["input_mode"] != "serial" - parallel_gpucachesim_mask &= df["input_memory_only"] == False - if functional_config is not None: - parallel_gpucachesim_mask &= ( - df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] - ) - parallel_gpucachesim_mask &= ( - df["input_num_clusters"] == functional_config["num_clusters"] - ) - parallel_gpucachesim_df = df[parallel_gpucachesim_mask] - parallel_gpucachesim_df, _ = aggregate_mean_input_config_stats( - parallel_gpucachesim_df, per_kernel=per_kernel, mean=mean - ) - print(_label("parallel gpucachesim", parallel_gpucachesim_df.shape)) - - return TargetDataframes( - native_df=native_df, - accelsim_df=accelsim_df, - serial_gpucachesim_df=serial_gpucachesim_df, - serial_gpucachesim_mem_only_df=serial_gpucachesim_mem_only_df, - serial_gpucachesim_exec_driven_df=serial_gpucachesim_exec_driven_df, - parallel_gpucachesim_df=parallel_gpucachesim_df, - ) - - -def choose_fastest_parallel_implementation(df) -> pd.DataFrame: - bench_input_cols = copy.deepcopy(list(benchmarks.ALL_BENCHMARK_INPUT_COLS)) - # note, we do NOT group by SIMULATE_EXECUTION_CONFIG_COLS or SIMULATE_INPUT_COLS. - # this means we do NOT group on input_mode, input_run_ahead, or input_threads - functinoal_input_cols = copy.deepcopy(benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS) - input_config_group_cols = ( - ["target", "benchmark"] + functinoal_input_cols + bench_input_cols - ) - input_config_group_cols = [col for col in input_config_group_cols if col in df] - - group_cols = input_config_group_cols + ["run"] - min_exec_times = df.groupby(group_cols, dropna=False)["exec_time_sec"].transform( - "min" - ) - df = df[df["exec_time_sec"] == min_exec_times] - return df - - -@main.command() -# @click.pass_context -@click.option("-p", "--path", help="Path to materialized benchmark config") -@click.option("-b", "--bench", "bench_name_arg", help="Benchmark name") -@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") -@click.option( - "--mean-time", - "include_mean_time", - type=bool, - is_flag=True, - help="include mean time", -) -@click.option( - "-v", "--vebose", "verbose", type=bool, is_flag=True, help="enable verbose output" -) -def speed_table(bench_name_arg, path, nsight, verbose, include_mean_time): - profiler = "nsight" if nsight else "nvprof" - selected_df = load_stats(bench_name=bench_name_arg, profiler=profiler, path=path) - - # remove non-kernel results - no_kernel_mask = selected_df["kernel_name"].isna() - selected_df = selected_df[~no_kernel_mask] - - # print(selected_df.loc[ - # (selected_df["target"] == Target.Simulate.value) - # & (selected_df["input_id"] == 210), - # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) - - # print(selected_df.loc[ - # (selected_df["target"] == Target.AccelsimSimulate.value) - # & (selected_df["input_id"] == 3), - # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) - - target_dfs = split_into_target_dfs(selected_df, per_kernel=False, mean=True) - - # print(target_dfs.serial_gpucachesim_df.loc[ - # target_dfs.serial_gpucachesim_df["input_id"] == 210, - # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) - - # print(target_dfs.accelsim_df.loc[ - # target_dfs.accelsim_df["input_id"] == 3, - # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) - - native_df = target_dfs.native_df - accelsim_df = target_dfs.accelsim_df - serial_gpucachesim_df = target_dfs.serial_gpucachesim_df - serial_gpucachesim_mem_only_df = target_dfs.serial_gpucachesim_mem_only_df - serial_gpucachesim_exec_driven_df = target_dfs.serial_gpucachesim_exec_driven_df - parallel_gpucachesim_df = choose_fastest_parallel_implementation( - target_dfs.parallel_gpucachesim_df - ) - print( - "{:>50}\t{}".format( - "fastest parallel gpucachesim", parallel_gpucachesim_df.shape - ) - ) - - benches = sorted(selected_df["benchmark"].unique().tolist()) - - # dtypes = { - # **{col: "float64" for col in native_df.columns}, - # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, - # } - # dtypes = {col: dtype for col, dtype in dtypes.items() if col in native_df} - # native_df = native_df.astype(dtypes) - - dtypes = dict() - sim_targets = { - "_accelsim": accelsim_df.astype(dtypes), - "_gpucachesim": serial_gpucachesim_df.astype(dtypes), - "_gpucachesim_mem_only": serial_gpucachesim_mem_only_df.astype(dtypes), - "_gpucachesim_exec_driven": serial_gpucachesim_exec_driven_df.astype(dtypes), - "_gpucachesim_parallel": parallel_gpucachesim_df.astype(dtypes), - } - - print("\n") - - for suffix, sim_df in sim_targets.items(): - print("computing =>", suffix) - # print(sim_df[benchmarks.PREVIEW_COLS][:4].T) - join_cols = list( - # we do NOT join based on target - ["benchmark", "kernel_launch_id"] - + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) - # we do NOT join based on input_memory_only - + ["input_num_clusters", "input_cores_per_cluster"], - ) - join_cols = [col for col in join_cols if col in selected_df] - # pprint(join_cols) - - missing_df = ( - native_df[join_cols] - .merge( - sim_df[join_cols], - how="left", - indicator=True, - ) - .loc[lambda x: x["_merge"] != "both"] - ) - if len(missing_df) > 0: - if suffix == "_gpucachesim_parallel": - # temp: ignore for now - pass - elif suffix == "_gpucachesim_exec_driven": - # we do not have an exec driven version of babelstream - missing_exec_driven_benches = sorted( - missing_df["benchmark"].unique().tolist() - ) - if missing_exec_driven_benches != ["babelstream"]: - print("MISSING {}".format(missing_df.shape)) - print(missing_df) - raise ValueError( - "missing exec driven {} but should only miss babelstream".format( - missing_exec_driven_benches - ) - ) - else: - print("MISSING {}".format(missing_df.shape)) - print(missing_df) - assert len(missing_df) == 0 - - joined_df = native_df.merge( - sim_df, - on=join_cols, - how="left", - suffixes=(None, suffix), - ) - assert joined_df.shape[0] == native_df.shape[0] - if len(joined_df) == 0: - raise ValueError("joined dataframe is empty") - - native_df = joined_df - - native_df["exec_time_nsec"] = native_df["exec_time_sec"] * 1e9 - # preview_metrics = ["cycles", "instructions", "exec_time_sec", "input_id"] - preview_metrics = ["input_id", "kernel_name", "exec_time_sec"] - preview_cols = ["benchmark", "exec_time_nsec"] + [ - col + suffix - for col, suffix in itertools.product( - preview_metrics, [""] + list(sim_targets.keys()) - ) - ] - - table = "" - for bench in benches + [None]: - print(bench) - if bench is None: - bench_name = "Combined" - else: - match bench.lower(): - case "vectoradd": - bench_name = "VectorAdd" - case "matrixmul": - bench_name = "Matrixmul" - case "simple_matrixmul": - bench_name = "Naive Matrixmul" - case "transpose": - bench_name = "Transpose" - case "babelstream": - bench_name = "BabelStream" - case other: - bench_name = str(other) - - table += r"\rowcolor{gray!10}" - table += r"\multicolumn{6}{c}{\textbf{" + bench_name + r"}} \\" - if bench is None: - table += r"\hline \hline" - else: - table += r"\hline" - table += "\n" - - # for metric in metrics: - if bench is not None: - bench_df = native_df[native_df["benchmark"] == bench] - else: - bench_df = native_df - - bench_df = bench_df.copy() - if verbose: - print(bench_df[preview_cols + benchmarks.BENCHMARK_INPUT_COLS[bench]]) - print(bench_df.shape) - - table += r"Slowdown" - slowdowns_over_native = np.array( - [ - slowdown( - baseline=bench_df["exec_time_sec"], - values=bench_df["exec_time_sec_accelsim"], - ), - slowdown( - baseline=bench_df["exec_time_sec"], - values=bench_df["exec_time_sec_gpucachesim"], - ), - slowdown( - baseline=bench_df["exec_time_sec"], - values=bench_df["exec_time_sec_gpucachesim_mem_only"], - ), - slowdown( - baseline=bench_df["exec_time_sec"], - values=bench_df["exec_time_sec_gpucachesim_exec_driven"], - ), - slowdown( - baseline=bench_df["exec_time_sec"], - values=bench_df["exec_time_sec_gpucachesim_parallel"], - ), - ] - ) - if bench is None: - slowdowns_over_native = np.nanmean(slowdowns_over_native, axis=1) - else: - slowdowns_over_native = np.mean(slowdowns_over_native, axis=1) - for slowdown_value in slowdowns_over_native: - table += " & " - if np.isnan(slowdown_value): - continue - bold = np.isfinite(slowdown_value) and slowdown_value == np.nanmin( - slowdowns_over_native - ) - if bold: - table += r"\boldmath" - table += "${}$".format(plot.human_format_thousands(slowdown_value)) - table += r"\\" + "\n" - - table += r"KIPS" - native_kilo_instructions = bench_df["instructions"] / 1000.0 - kips = np.array( - [ - native_kilo_instructions / bench_df["exec_time_sec_accelsim"], - native_kilo_instructions / bench_df["exec_time_sec_gpucachesim"], - (bench_df["instructions_gpucachesim_mem_only"] / 1000.0) - / bench_df["exec_time_sec_gpucachesim_mem_only"], - (bench_df["instructions_gpucachesim_exec_driven"] / 1000.0) - / bench_df["exec_time_sec_gpucachesim_exec_driven"], - native_kilo_instructions - / bench_df["exec_time_sec_gpucachesim_parallel"], - ] - ) - - # print("kips:") - # print(kips) - if bench is None: - kips = np.nanmean(kips, axis=1) - else: - kips = np.mean(kips, axis=1) - for kips_value in kips: - table += " & " - if np.isnan(kips_value): - continue - bold = np.isfinite(kips_value) and kips_value == np.nanmax(kips) - if bold: - table += r"\boldmath" - table += "${}$".format(plot.human_format_thousands(kips_value)) - - if include_mean_time: - table += r"\\" + "\n" - table += r"mean time" - mean_time = np.array( - [ - bench_df["exec_time_sec_accelsim"], - bench_df["exec_time_sec_gpucachesim"], - bench_df["exec_time_sec_gpucachesim_mem_only"], - bench_df["exec_time_sec_gpucachesim_exec_driven"], - bench_df["exec_time_sec_gpucachesim_parallel"], - ] - ) - if bench is None: - mean_time = np.nanmean(mean_time, axis=1) - else: - mean_time = np.mean(mean_time, axis=1) - for mean_time_value in mean_time: - table += " & " - if np.isnan(mean_time_value): - continue - bold = np.isfinite(mean_time_value) and mean_time_value == np.nanmin( - mean_time - ) - if bold: - table += r"\boldmath" - table += "${:5.1f}s$".format(mean_time_value) - # table += r"\\" + "\n" - - table += r"\\" - # if bench is not None: - table += r" \hline" - table += "\n" - table += "% \n" - - table += "%\n%\n" - - print(table) - utils.copy_to_clipboard(table) - print("copied table to clipboard") - - -class ErrorMetric(enum.Enum): - MAPE = "MAPE" - SMAPE = "SMAPE" - MAE = "MAE" - Correlation = "Corr." - EMALE = "EMALE" - ERMSLE = "ERMSLE" - # RelErr = "Rel err." - - # MAPE = ("mape", "MAPE") - # Correlation = ("corr", "Corr.") - # RelErr = ("rel_err", "Rel err.") - - -# from collections import namedtuple - -# ErrorMetric = namedtuple('ErrorMetric', ['value', 'label', 'column']) - -# class ErrorMetrics(enum.Enum): -# -# @property -# def column(self): -# return self.value.column -# -# yellow = ErrorMetric(1, 'Yellow') -# green = Color(2, 'Green') - - -@main.command() -# @click.pass_context -@click.option("--path", help="Path to materialized benchmark config") -@click.option("--bench", "bench_name_arg", help="Benchmark name") -@click.option("--metric", "metric_arg", type=str, help="metric") -@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") -@click.option( - "-v", "--vebose", "verbose", type=bool, is_flag=True, help="enable verbose output" -) -def result_table(path, bench_name_arg, metric_arg, nsight, verbose): - profiler = "nsight" if nsight else "nvprof" - selected_df = load_stats(bench_name=bench_name_arg, profiler=profiler, path=path) - - # remove non-kernel results - selected_df = selected_df[~selected_df["kernel_name"].isna()] - - # target benchmark histogram - target_bench_input_count_hist = ( - selected_df[["target", "benchmark", "input_id"]] - .drop_duplicates() - .value_counts(["target", "benchmark"], dropna=False) - .sort_index() - ) - print(target_bench_input_count_hist) - - target_dfs = split_into_target_dfs(selected_df, per_kernel=False, mean=True) - native_df = target_dfs.native_df - accelsim_df = target_dfs.accelsim_df - serial_gpucachesim_df = target_dfs.serial_gpucachesim_df - serial_gpucachesim_mem_only_df = target_dfs.serial_gpucachesim_mem_only_df - serial_gpucachesim_exec_driven_df = target_dfs.serial_gpucachesim_exec_driven_df - - class Metric(typing.TypedDict): - label: str - is_percent: bool - error_metrics: typing.Sequence[typing.Tuple[str, ErrorMetric]] - - benches = sorted(selected_df["benchmark"].unique().tolist()) - all_metrics = [ - Metric( - label="Cycles", - is_percent=False, - error_metrics=[ - # ("cycles", ErrorMetric.RelErr), - ("cycles", ErrorMetric.EMALE), - ("cycles", ErrorMetric.ERMSLE), - ("cycles", ErrorMetric.SMAPE), - ("cycles", ErrorMetric.MAPE), - ("cycles", ErrorMetric.Correlation), - ], - ), - Metric( - label="DRAM reads", - is_percent=False, - error_metrics=[ - ("dram_reads", ErrorMetric.EMALE), - ("dram_reads_percent", ErrorMetric.MAPE), - ("dram_reads", ErrorMetric.Correlation), - ], - ), - Metric( - label="DRAM writes", - is_percent=False, - error_metrics=[ - ("dram_writes", ErrorMetric.EMALE), - ("dram_writes_percent", ErrorMetric.MAPE), - ("dram_writes", ErrorMetric.Correlation), - ], - ), - Metric( - label="L1 Accesses", - is_percent=False, - error_metrics=[ - ("l1_accesses", ErrorMetric.EMALE), - ("l1_accesses", ErrorMetric.MAPE), - ("l1_accesses", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2 Accesses", - is_percent=False, - error_metrics=[ - ("l2_accesses", ErrorMetric.EMALE), - ("l2_accesses", ErrorMetric.MAPE), - ("l2_accesses", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2 reads", - is_percent=False, - error_metrics=[ - ("l2_reads", ErrorMetric.EMALE), - ("l2_reads", ErrorMetric.MAPE), - ("l2_reads", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2 writes", - is_percent=False, - error_metrics=[ - ("l2_writes", ErrorMetric.EMALE), - ("l2_writes", ErrorMetric.MAPE), - ("l2_writes", ErrorMetric.Correlation), - ], - ), - Metric( - label="L1D hitrate", - is_percent=True, - error_metrics=[ - ("l1_global_hit_rate", ErrorMetric.EMALE), - ("l1_global_hit_rate", ErrorMetric.MAE), - ("l1_global_hit_rate", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2D hitrate", - is_percent=True, - error_metrics=[ - ("l2_hit_rate", ErrorMetric.EMALE), - ("l2_hit_rate", ErrorMetric.MAE), - ("l2_hit_rate", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2D read hitrate", - is_percent=True, - error_metrics=[ - ("l2_read_hit_rate", ErrorMetric.EMALE), - ("l2_read_hit_rate", ErrorMetric.MAE), - ("l2_read_hit_rate", ErrorMetric.Correlation), - ], - ), - Metric( - label="L2D write hitrate", - is_percent=True, - error_metrics=[ - ("l2_write_hit_rate", ErrorMetric.EMALE), - ("l2_write_hit_rate", ErrorMetric.MAE), - ("l2_write_hit_rate", ErrorMetric.Correlation), - ], - ), - ] - - # metrics = [dram_reads] - # metrics = [dram_writes] - # metrics = [l1_accesses] - # metrics = [l2_accesses] - # metrics = [ - # l1_hit_rate, - # l2_hit_rate, - # l1_accesses, - # l2_accesses, - # cycles, - # dram_reads, - # dram_writes, - # ] - # metrics = [l1_hit_rate] - # metrics = [l2_hit_rate] - # metrics = [cycles] - - if metric_arg is None: - metrics = all_metrics[:1] - else: - metrics = [ - m - for m in all_metrics - if metric_arg.replace(" ", "").lower() - == m["label"].replace(" ", "").lower() - ] - if len(metrics) == 0: - raise ValueError( - "no metric named {}, have {}", - metric_arg, - [m["label"].replace(" ", "").lower() for m in all_metrics], - ) - - print("\n") - print( - "computing {} metrics: {} for {} benches: {}".format( - len(metrics), [m["label"] for m in metrics], len(benches), benches - ) - ) - - # dtypes = { - # **{col: "float64" for col in native_df.columns}, - # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, - # } - # dtypes = {col: dtype for col, dtype in dtypes.items() if col in native_df} - # native_df = native_df.astype(dtypes) - - dtypes = dict() - sim_targets = { - "_accelsim": accelsim_df.astype(dtypes), - "_gpucachesim": serial_gpucachesim_df.astype(dtypes), - "_gpucachesim_mem_only": serial_gpucachesim_mem_only_df.astype(dtypes), - "_gpucachesim_exec_driven": serial_gpucachesim_exec_driven_df.astype(dtypes), - } - - for suffix, sim_df in sim_targets.items(): - print("computing =>", suffix) - # print(sim_df[benchmarks.PREVIEW_COLS][:4].T) - join_cols = list( - # we do NOT join based on target - ["benchmark", "kernel_launch_id"] - + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) - # we do NOT join based on input_memory_only - + ["input_num_clusters", "input_cores_per_cluster"], - ) - join_cols = [col for col in join_cols if col in selected_df] - # pprint(join_cols) - - missing_df = ( - native_df[join_cols] - .merge( - sim_df[join_cols], - how="left", - indicator=True, - ) - .loc[lambda x: x["_merge"] != "both"] - ) - if len(missing_df) > 0: - if suffix == "_gpucachesim_parallel": - # temp: ignore for now - pass - elif suffix == "_gpucachesim_exec_driven": - # we do not have an exec driven version of babelstream - missing_exec_driven_benches = sorted( - missing_df["benchmark"].unique().tolist() - ) - if missing_exec_driven_benches != ["babelstream"]: - print("MISSING {}".format(missing_df.shape)) - print(missing_df) - raise ValueError( - "missing exec driven {} but should only miss babelstream".format( - missing_exec_driven_benches - ) - ) - else: - print("MISSING {}".format(missing_df.shape)) - print(missing_df) - assert len(missing_df) == 0 - - joined_df = native_df.merge( - sim_df, - on=join_cols, - how="left", - suffixes=(None, suffix), - ) - assert joined_df.shape[0] == native_df.shape[0] - if len(joined_df) == 0: - raise ValueError("joined dataframe is empty") - - native_df = joined_df - # break - - for suffix in list(sim_targets.keys()) + [""]: - native_df["dram_reads_percent" + suffix] = native_df[ - "dram_reads" + suffix - ].fillna(0.0) - scale = ( - native_df[["num_global_loads", "num_global_stores"]].max(axis=1) + 0.00001 - ) - native_df["dram_reads_percent" + suffix] /= scale - native_df["dram_writes_percent" + suffix] = native_df[ - "dram_writes" + suffix - ].fillna(0.0) - native_df["dram_writes_percent" + suffix] /= scale - assert (native_df["dram_writes_percent" + suffix] <= 1.0).all() - assert (native_df["dram_reads_percent" + suffix] <= 1.0).all() - - assert all( - [ - col in native_df - for col, _ in utils.flatten([m["error_metrics"] for m in metrics]) - ] - ) - - # preview_cols = [ - # "benchmark", - # "input_id", - # "num_global_loads", - # "num_global_stores", - # ] + [ - # col + suffix - # for col, suffix in itertools.product( - # # ["cycles"], - # # ["dram_writes", "dram_writes_percent"], - # # ["dram_reads", "dram_reads_percent"], - # ["l1_accesses"], - # # [""] + list(sim_targets.keys()) - # ["", "_accelsim", "_gpucachesim"], - # ) - # ] - # print(native_df[preview_cols]) - - for metric in metrics: - metric_cols = sorted( - list(set([metric_col for metric_col, _ in metric["error_metrics"]])) - ) - print("==> PREVIEW: {}".format(metric_cols)) - preview_cols = [ - "benchmark", - "input_id", - # "num_global_loads", - # "num_global_stores", - ] + [ - col + suffix - for col, suffix in itertools.product( - metric_cols, - [""] + list(sim_targets.keys()), - # ["", "_accelsim", "_gpucachesim"], - ) - ] - print(native_df[preview_cols]) - - table = "" - for bench in benches + [None]: - if bench is None: - bench_name = "Combined" - else: - match bench.lower(): - case "vectoradd": - bench_name = "VectorAdd" - case "matrixmul": - bench_name = "Matrixmul" - case "simple_matrixmul": - bench_name = "Naive Matrixmul" - case "transpose": - bench_name = "Transpose" - case "babelstream": - bench_name = "BabelStream" - case other: - bench_name = str(other) - - table += r"\rowcolor{gray!10}" - table += r"\multicolumn{6}{c}{\textbf{" + bench_name + r"}} \\" - if bench is None: - table += r"\hline \hline" - else: - table += r"\hline" - table += "\n" - - for metric in metrics: - print(bench, metric["label"]) - - if bench is not None: - bench_df = native_df[native_df["benchmark"] == bench] - else: - bench_df = native_df - # continue - - table += r"\multirow{" + str(len(metric["error_metrics"])) + "}{*}{" - table += " ".join(str(metric["label"]).split("_")) - table += "} \n" - - for metric_col, error_metric in metric["error_metrics"]: - preview_cols = ["benchmark"] + [ - col + suffix - for col, suffix in itertools.product( - [metric_col], [""] + list(sim_targets.keys()) - ) - ] - - bench_df = bench_df.copy() - if bench is not None and verbose: - print( - bench_df[ - preview_cols + benchmarks.BENCHMARK_INPUT_COLS[bench] - ].fillna(0.0) - ) - print(bench_df.shape) - - error_values: pd.DataFrame - - metric_is_percent = metric["is_percent"] - value_scale = 100.0 if metric_is_percent else 1.0 - - match error_metric: - case ErrorMetric.Correlation: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - atol = 1.0 if metric_is_percent else 0.1 - error = correlation( - true_values=true_values, values=values, atol=atol - ) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values = error_values.mean(axis=1) - - # case ErrorMetric.RelErr: - # error_values = [] - # for suffix in sim_targets.keys(): - # true_values=bench_df[metric_col] - # values=bench_df[metric_col + suffix] - # error = rel_err(true_values=true_values, values=values) - # bench_df[metric_col + "_" + error_metric.name.lower() + suffix] = error - # error_values.append(error) - # error_values = pd.DataFrame(error_values) - # error_values = error_values.mean(axis=1) - # # error_values *= 100.0 - - case ErrorMetric.EMALE: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - error = emale(true_values=true_values, values=values) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values = error_values.mean(axis=1) - - case ErrorMetric.ERMSLE: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - error = ermsle(true_values=true_values, values=values) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values = error_values.mean(axis=1) - - case ErrorMetric.MAE: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - error = abs_err(true_values=true_values, values=values) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values = error_values.mean(axis=1) - - case ErrorMetric.SMAPE: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - error = smape(true_values=true_values, values=values) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values *= 100.0 - error_values = error_values.mean(axis=1) - - case ErrorMetric.MAPE: - error_values = [] - for suffix in sim_targets.keys(): - true_values = bench_df[metric_col] * value_scale - values = bench_df[metric_col + suffix] * value_scale - error = mape(true_values=true_values, values=values) - bench_df[ - metric_col + "_" + error_metric.name.lower() + suffix - ] = error - error_values.append(error) - error_values = pd.DataFrame(error_values) - error_values *= 100.0 - error_values = error_values.mean(axis=1) - # error_values = error_values.aggregate(scipy.stats.gmean, axis=1) - # .apply(np.exp) - # error_values = pd.DataFrame([ - # abs_err( - # true_values=bench_df[metric_col], - # values=bench_df[metric_col + suffix] - # ) for suffix in sim_targets.keys() - # ]) - # keys = [ - # metric_col + "_" + error_metric.name.lower() + suffix - # for suffix in sim_targets.keys() - # ] - # # print(keys) - # print(error_values.shape) - # bench_df[keys] = error_values.to_numpy().ravel() - # error_values = error_values.mean(axis=1) - case _: - raise ValueError( - "unknown error metric {}".format(error_metric.name) - ) - - # assert isinstance(error_values, (np.ndarray, pd.Series)) - for col, suffix in enumerate(sim_targets.keys()): - valid = not np.isnan(bench_df[metric_col + suffix]).all() - if not valid: - error_values[col] = np.nan - - table += r" & {} ".format(error_metric.value) - print(error_metric.name) - print(error_values) - for value in error_values: - table += " & " - if np.isnan(value): - continue - match error_metric: - case ErrorMetric.Correlation: - if value == np.nanmax(error_values): - table += r"\boldmath" - table += "${:5.3f}$".format(value) - # case ErrorMetric.RelErr: - # if value == np.nanmin(error_values): - # table += r"\boldmath" - # table += "${:5.2f}\\%$".format(value) - # case ErrorMetric.MALE: - # if value == np.nanmin(error_values): - # table += r"\boldmath" - # table += "${}\\%$".format( - # plot.human_format_thousands(value) - # ) - # case ErrorMetric.SMAPE: - # if value == np.nanmin(error_values): - # table += r"\boldmath" - # table += "${}\\%$".format( - # plot.human_format_thousands(value) - # ) - case ErrorMetric.SMAPE | ErrorMetric.MAPE: - if value == np.nanmin(error_values): - table += r"\boldmath" - table += "${}\\%$".format( - plot.human_format_thousands(value) - ) - case ErrorMetric.EMALE | ErrorMetric.ERMSLE | ErrorMetric.MAE: - if value == np.nanmin(error_values): - table += r"\boldmath" - if metric_is_percent: - table += "${:5.2f}\\%$".format(value) - else: - table += "${}$".format( - plot.human_format_thousands(value) - ) - - table += r"\\" + "\n" - - # if not accelsim_valid: - # metric_row[0] = np.nan - # if not gpucachesim_valid: - # metric_row[1] = np.nan - # if not gpucachesim_mem_only_valid: - # metric_row[2] = np.nan - # if not gpucachesim_exec_valid: - # metric_row[3] = np.nan - - if bench is not None and verbose: - print( - bench_df[ - # + [sim + "_rel_err" for sim in ["accelsim", "gpucachesim"]] - # + [sim + "_rmse" for sim in ["accelsim", "gpucachesim"]] - preview_cols - + [ - metric_col + "_" + error_metric.name.lower() + suffix - for suffix in ["_accelsim", "_gpucachesim"] - ] - # + [sim + "_rpd" for sim in ["accelsim", "gpucachesim"]] - ].fillna(0.0) - ) - # if bench is not None: - table += r" \hline" - table += "\n" - - table += "%\n%\n" +@main.command(name="speed-table") +@click.option("-p", "--path", help="Path to materialized benchmark config") +@click.option("-b", "--bench", "bench_name", help="Benchmark name") +@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") +@click.option( + "--mean-time", + "include_mean_time", + type=bool, + is_flag=True, + help="include mean time", +) +@click.option( + "-v", "--vebose", "verbose", type=bool, is_flag=True, help="enable verbose output" +) +@click.option("--png", "png", type=bool, is_flag=True, help="convert to png") +def run_speed_table(bench_name, path, nsight, verbose, include_mean_time, png): + profiler = "nsight" if nsight else "nvprof" + selected_df = load_stats(bench_name=bench_name, profiler=profiler, path=path) + gpucachesim.stats.speed_table.speed_table(selected_df, bench_name, include_mean_time=include_mean_time, verbose=verbose, png=png) - print(table) - utils.copy_to_clipboard(table) - print("copied table to clipboard") + -@main.command() +@main.command(name="result-table") # @click.pass_context @click.option("--path", help="Path to materialized benchmark config") -@click.option("--bench", "-b", "bench_name_arg", help="Benchmark name") +@click.option("--bench", "bench_name", help="Benchmark name") +@click.option("--metric", "metric", type=str, help="metric") +@click.option( + "--combined-only", "combined_only", type=bool, is_flag=True, help="only output combined metrics" +) @click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") -def parallel_table(bench_name_arg, path, nsight): +@click.option( + "-v", "--vebose", "verbose", type=bool, is_flag=True, help="enable verbose output" +) +@click.option("--png", "png", type=bool, is_flag=True, help="convert to png") +def run_result_table(path, bench_name, metric, combined_only, nsight, verbose, png): profiler = "nsight" if nsight else "nvprof" - all_benchmarks = bench_name_arg is None - selected_df = load_stats(bench_name=bench_name_arg, profiler=profiler, path=path) - - print(selected_df[["target", "run"]].drop_duplicates()) - - # only keep simulation and remove non kernel stats - selected_df = selected_df[selected_df["target"] == Target.Simulate.value] - selected_df = selected_df[~selected_df["kernel_name"].isna()] - # selected_df = sum_per_config_kernel_metrics(selected_df) - selected_df, _ = aggregate_mean_input_config_stats( - selected_df, per_kernel=False, mean=False - ) - - num_benchmarks = len(selected_df["benchmark"].unique().tolist()) - - all_input_cols = copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) - all_input_cols = sorted(list([col for col in all_input_cols if col in selected_df])) - - # bench_cols = copy.deepcopy(benchmarks.BENCH_TARGET_INDEX_COLS) - bench_input_cols = ( - [] - if all_benchmarks - else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name_arg]) - ) - # bench_input_cols = ( - # list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) - set(["input_mode"])) - # if all_benchmarks else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name_arg]) - # ) - - # get serial - serial = selected_df[selected_df["input_mode"] == "serial"].copy() - - metric_cols = set(serial.columns) - metric_cols -= set([c for c in serial.columns if c.startswith("input_")]) - metric_cols -= set(benchmarks.NON_NUMERIC_COLS) - metric_cols -= set(["exec_time_sec", "run"]) - metric_cols = list(metric_cols) - metric_cols = sorted(metric_cols) - # pprint(metric_cols) - # print(serial.loc[ - # serial["input_id"] == 0, - # # ["cycles", "kernel_launch_id", "stream_id", "run"], - # ["target", "benchmark", "input_id", "kernel_name_mangled", "kernel_name", "run"] - # + metric_cols, - # ].T) - - deterministic_group_cols = [ - "target", - "benchmark", - "input_id", - "kernel_launch_id", - "kernel_name_mangled", - "kernel_name", - ] - metric_cols = [col for col in metric_cols if col not in deterministic_group_cols] - - def _inspect_deterministic_metrics(df): - print(df[metric_cols].nunique().T) - pass - - # print(serial.groupby(deterministic_group_cols, dropna=False)[metric_cols].apply(lambda df: print(df.T))) - # print(deterministic_group_cols) - # print(metric_cols) - serial_deterministic_grouped = serial.groupby( - deterministic_group_cols, dropna=False - ) - # serial_deterministic_grouped[serial.columns].apply(_inspect_deterministic_metrics) - unique_simulation_metrics = serial_deterministic_grouped[metric_cols].nunique() - assert (unique_simulation_metrics == 1).all(axis=1).all() - - # parallel - parallel = selected_df[~selected_df["input_mode"].isin([np.nan, "serial"])] - assert "total_cores" in serial - assert "total_cores" in parallel - - print("serial size", serial.shape) - print("parallel size", parallel.shape) - - # those are fully distinct - serial_input_ids = sorted(serial["input_id"].unique().tolist()) - parallel_input_ids = sorted(parallel["input_id"].unique().tolist()) - print("num serial input ids: ", len(serial_input_ids)) - print("num parallel input ids: ", len(parallel_input_ids)) - if len(serial_input_ids) == 0: - raise ValueError("have zero serial benchmark configurations") - if len(parallel_input_ids) == 0: - raise ValueError("have zero parallel benchmark configurations") - - print("serial input ids", serial_input_ids) - print("parallel input ids", parallel_input_ids) - - deterministic = parallel[parallel["input_mode"] == "deterministic"] - assert len(deterministic) > 0 - unique_simulation_metrics = deterministic.groupby( - deterministic_group_cols, - dropna=False, - )[metric_cols].nunique() - - config_with_identical_results = (unique_simulation_metrics == 1).all(axis=1) - if not config_with_identical_results.all(): - bad_configs = unique_simulation_metrics[ - ~config_with_identical_results - ].reset_index() - # print(bad_configs.T) - bad = deterministic.merge( - bad_configs, - on=deterministic_group_cols, - how="inner", - suffixes=("", "_nunique"), - ) - # print(bad.T) - print(bad[deterministic_group_cols + ["run"] + metric_cols].T) - - assert ( - config_with_identical_results.all() - ), "deterministic configuration results differ for different runs, which makes them rather nondeterministic" - - # non deterministic without interleaving is also deterministic actually - nondeterministic = parallel[parallel["input_mode"] == "nondeterministic"] - # unique_simulation_metrics = nondeterministic.groupby( - # deterministic_group_cols, dropna=False - # )[metric_cols].nunique() - assert len(nondeterministic) > 0 - - input_id_partitoning = set(serial["input_id"].unique()).intersection( - set(parallel["input_id"].unique()) - ) - if len(input_id_partitoning) > 0: - print(color("serial and parallel input ids intersect ", fg="red")) - for input_id in input_id_partitoning: - input_preview_cols = list( - ["input_id"] - + benchmarks.BENCH_TARGET_INDEX_COLS - + ["kernel_launch_id"] - + bench_input_cols - + benchmarks.SIMULATE_INPUT_COLS - ) - - print("serial with input id", input_id) - print(serial.loc[serial["input_id"] == input_id, input_preview_cols]) - print("parallel input", input_id) - print(parallel.loc[parallel["input_id"] == input_id, input_preview_cols]) - break - assert ( - len(input_id_partitoning) == 0 - ), "serial and parallel inputs intersect, this is generally solved by regenerating the aggregated csv stats" - - # join based on input_cols, NOT based on mode - join_cols = list( - benchmarks.BENCH_TARGET_INDEX_COLS - + ["kernel_name", "kernel_launch_id", "run"] - + ( - list( - copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) - set(["input_mode"]) - ) - if all_benchmarks - else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name_arg]) - ) - + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - ) - pprint(join_cols) - - pre_join_preview_cols = ["benchmark", "kernel_name", "kernel_launch_id", "run"] - serial_indices = serial[pre_join_preview_cols].drop_duplicates(ignore_index=True) - parallel_indices = parallel[pre_join_preview_cols].drop_duplicates( - ignore_index=True - ) - # print(serial_indices) - # print(parallel_indices) - diff = parallel_indices.compare(serial_indices) - if len(diff) != 0: - print("DIFF START") - print(diff) - print("DIFF END") - assert len(diff) == 0 - - joined = parallel.merge( - serial, - on=join_cols, - how="left", - suffixes=("_parallel", "_serial"), - ) - print( - "joined={} parallel={} serial={}".format( - joined.shape, parallel.shape, serial.shape - ) - ) - - # test = joined["target"] == Target.Simulate.value - # test &= joined["benchmark"] == "vectorAdd" - # test &= joined["kernel_name"] == "vecAdd" - # test &= joined["kernel_launch_id"] == 0 - # test &= joined["run"] == 1 - # test &= joined["input_memory_only"] == False - # test &= joined["input_num_clusters"] == 56 - # test &= joined["input_cores_per_cluster"] == 1 - # pprint(list(joined.columns.tolist())) - # print(joined[test]) - - assert joined.shape[0] == parallel.shape[0] - assert "mean_blocks_per_sm_parallel" in joined - assert "total_cores_parallel" in joined - assert "cores_per_cluster_parallel" in joined - assert set(joined["input_id_serial"].values) == set(serial["input_id"].values) - - if len(joined) == 0: - raise ValueError("joined parallel and serial dataframe is empty") - - preview_metric_cols = ["cycles", "exec_time_sec", "l2_hit_rate", "l1_hit_rate"] - preview_cols = list( - benchmarks.BENCH_TARGET_INDEX_COLS - + ["kernel_name", "kernel_launch_id", "run"] - + ["input_id_serial", "input_id_parallel"] - + bench_input_cols - + [c + "_serial" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - + [c + "_parallel" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - + sorted( - [c + "_serial" for c in preview_metric_cols] - + [c + "_parallel" for c in preview_metric_cols] - ) - ) - # print(joined[preview_cols][:4].T) - - group_cols = sorted( - benchmarks.BENCH_TARGET_INDEX_COLS - + bench_input_cols - + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - + [col + "_parallel" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - + [col + "_serial" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] - ) - assert "input_id" not in group_cols - assert "input_id_serial" not in group_cols - - aggregations = { - **{c: "mean" for c in sorted(joined.columns)}, - **{c: agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, - **{c + "_parallel": agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, - **{c + "_serial": agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, - } - aggregations = { - col: agg - for col, agg in aggregations.items() - if col in joined and not col in group_cols - } - # pprint(aggregations) - # pprint(group_cols) - - if set(joined.columns.tolist()) - set(group_cols) != set(aggregations.keys()): - pprint( - (set(joined.columns.tolist()) - set(group_cols)).symmetric_difference( - set(aggregations.keys()) - ) - ) - raise ValueError - - # def add_no_kernel_exec_time(df): - # # print(df[preview_cols].T) - # assert len(df) >= 2, "have no kernel row and at least one kernel for the config" - # valid_kernels = ~df["kernel_name"].isna() - # no_kernel = df[~valid_kernels] - # assert len(no_kernel) == 1 - # num_valid_kernels = valid_kernels.sum() - # assert num_valid_kernels >= 1 - # serial_delta = float(no_kernel["exec_time_sec_serial"].iloc[0]) / num_valid_kernels - # parallel_delta = float(no_kernel["exec_time_sec_parallel"].iloc[0]) / num_valid_kernels - # df.loc[valid_kernels, "exec_time_sec_serial"] += serial_delta - # df.loc[valid_kernels, "exec_time_sec_parallel"] += parallel_delta - # return df - # - # joined = joined.groupby( - # group_cols + ["run"], dropna=False).apply( - # add_no_kernel_exec_time).reset_index(drop=True) - - # # remove non kernel stats - # grouped = joined[~joined["kernel_name"].isna()].groupby(group_cols, dropna=False) - grouped = joined.groupby(group_cols, dropna=False) - - # this is just for checking things - def _inspect(df): - # print(df) - # print(df.columns) - # print(df.index) - if not all_benchmarks: - assert len(df["input_id_serial"].unique()) == 1 - # print("num runs", len(df["run"].unique())) - pass - - grouped[joined.columns].apply(_inspect) - - aggregated = grouped.agg(aggregations, squeeze=False) - - # speedup - def compute_speedup(df): - # only count speedup for large enough inputs - exec_time_sec_serial = df["exec_time_sec_serial"] - exec_time_sec_parallel = df["exec_time_sec_parallel"] - exec_time_sec_parallel = df[ - ["exec_time_sec_serial", "exec_time_sec_parallel"] - ].min(axis=1) - return speedup( - baseline=exec_time_sec_serial, values=exec_time_sec_parallel - ).mean() - - aggregated["exec_time_sec_speedup"] = grouped[joined.columns].apply(compute_speedup) - - # cycles error - aggregated["cycles_mape"] = grouped[joined.columns].apply( - lambda df: mape( - true_values=df["cycles_serial"], values=df["cycles_parallel"] - ).mean() - ) - # l1 hit rate error - aggregated["l1_hit_rate_mae"] = grouped[joined.columns].apply( - lambda df: abs_err( - true_values=df["l1_hit_rate_serial"], values=df["l1_hit_rate_parallel"] - ).mean() - ) - # # l2 hit rate error - aggregated["l2_hit_rate_mae"] = grouped[joined.columns].apply( - lambda df: abs_err( - true_values=df["l2_hit_rate_serial"], values=df["l2_hit_rate_parallel"] - ).mean() - ) - # dram reads error - aggregated["dram_reads_smape"] = grouped[joined.columns].apply( - lambda df: smape( - true_values=df["dram_reads_serial"], values=df["dram_reads_parallel"] - ) # .mean() - ) - # dram writes error - aggregated["dram_writes_smape"] = grouped[joined.columns].apply( - lambda df: smape( - true_values=df["dram_writes_serial"], values=df["dram_writes_parallel"] - ) # .mean() - ) - - # print(aggregated[[ - # "target", - # "benchmark", - # "input_variant", - # "dram_reads_serial", - # "dram_reads_parallel", - # "dram_reads_rel_err", - # "dram_writes_serial", - # "dram_writes_parallel", - # "dram_writes_rel_err", - # ]]) - - aggregated = aggregated.reset_index() - print( - aggregated.loc[ - # 500_000 vectoradd - aggregated["input_id_serial"] == 210.0, - preview_cols - + [ - "cycles_mape", - "dram_reads_smape", - "dram_writes_smape", - "exec_time_sec_speedup", - ], - ][0:4].T.drop_duplicates() - ) - - # build the table data - assert 8 * benchmarks.BASELINE["num_clusters"] == 224 - - functional_configs: typing.Sequence[typing.Dict[str, typing.Any]] = [ - dict( - input_memory_only=False, - input_num_clusters=benchmarks.BASELINE["num_clusters"], - input_cores_per_cluster=1, - ), - dict( - input_memory_only=False, - input_num_clusters=benchmarks.BASELINE["num_clusters"], - input_cores_per_cluster=4, - # input_num_clusters=4*benchmarks.BASELINE["num_clusters"], - # input_cores_per_cluster=1, - ), - ] - selected_benchmarks: typing.Sequence[typing.Dict[str, typing.Any]] = [] - for functional_config in functional_configs: - selected_benchmarks += [ - dict( - name="vectorAdd", - inputs={ - **{"input_dtype": 32, "input_length": 500_000}, - **functional_config, - }, - ), - dict( - name="babelstream", - inputs={ - **{"input_size": 102400}, - **functional_config, - }, - ), - dict( - name="transpose", - inputs={ - # **{"input_variant": "naive", "input_dim": 512}, - **{"input_variant": "coalesced", "input_dim": 512}, - **functional_config, - }, - ), - dict( - name="matrixmul", - inputs={ - **{"input_dtype": 32, "input_rows": 512}, - **functional_config, - }, - ), - dict( - name="simple_matrixmul", - inputs={ - **{ - "input_dtype": 32, - "input_m": 512, - "input_n": 32, - "input_p": 512, - }, - **functional_config, - }, - ), - ] - - def compute_table_row_label(bench_config, df): - benchmark = df["benchmark"] - bench_input_cols = copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[benchmark]) - assert all([c in df for c in bench_input_cols]) - - assert ( - df[["total_cores_parallel"]].values == df[["total_cores_serial"]].values - ).all() - - assert len(df[["input_cores_per_cluster"]].value_counts()) == 1 - assert len(df[["input_num_clusters"]].value_counts()) == 1 - assert len(df[["total_cores_parallel"]].value_counts()) == 1 - - cores_per_cluster = int(df[["input_cores_per_cluster"]].values[0]) - num_clusters = int(df[["input_num_clusters"]].values[0]) - total_cores = num_clusters * cores_per_cluster - - assert bench_config["inputs"]["input_cores_per_cluster"] == cores_per_cluster - assert bench_config["inputs"]["input_num_clusters"] == num_clusters - print( - df[ - [ - "benchmark", - "input_cores_per_cluster", - "input_num_clusters", - "total_cores_parallel", - ] - ] - ) - assert total_cores == int(df[["total_cores_parallel"]].values[0]) - - match benchmark.lower(): - case "vectoradd": - label = "VectorAdd (f{:<2}, {})".format( - int(df["input_dtype"]), - int(df["input_length"]), - ) - case "matrixmul": - label = "MatrixMul (f{:<2}, {}x{}x{})".format( - int(df["input_dtype"]), - int(df["input_rows"]), - int(df["input_rows"]), - int(df["input_rows"]), - ) - case "simple_matrixmul": - label = "Naive MatrixMul (f{:<2}, {}x{}x{})".format( - int(df["input_dtype"]), - int(df["input_m"]), - int(df["input_n"]), - int(df["input_p"]), - ) - case "transpose": - label = "Transpose ({}, {}x{})".format( - df["input_variant"], - int(df["input_dim"]), - int(df["input_dim"]), - ) - case "babelstream": - label = "BabelStream ({})".format(int(df["input_size"])) - case other: - label = str(other) - - label += " @ {} SM's [{:.2f} CTA/SM]".format( - int(df["total_cores_parallel"]), - float(df["mean_blocks_per_sm_parallel"]), - ) - return label - - def write_table_row(row, bold_values=None): - if bold_values is None: - bold_values = set() - - def bold(v, formatted_v): - if v in bold_values: - formatted_v = formatted_v.strip() - is_math = formatted_v[0] == "$" and formatted_v[-1] == "$" - if is_math: - return r"\boldmath" + str(formatted_v) - else: - return r"\textbf{" + str(formatted_v) + "}" - return str(formatted_v) - - is_first_metric_row = row.threads == 4 - is_last_metric_row = row.threads == 8 - - table_row = "" - - # metric name - if is_first_metric_row: - table_row += r"\multirow{2}{*}{\shortstack[r]{" + str(row.metric) + r"}}" - - # threads - table_row += r" & $t=" + str(row.threads) + r"$ " - - # serial value - if row.serial_value is not None and is_first_metric_row: - table_row += ( - r" & \multirow{2}{*}{\shortstack[r]{" - + bold(row.serial_value[0], row.serial_value[1]) - + r"}} " - ) - else: - table_row += r" & " - - # deterministic value - if row.det_value is not None: - table_row += r" & " + bold(row.det_value[0], row.det_value[1]) - else: - table_row += r" & " - - # nondeterministic value - for nondet_value, formatted_nondet_value in row.nondet_values: - table_row += r" & " + bold(nondet_value, formatted_nondet_value) - table_row += r" \\ " - if is_last_metric_row: - table_row += r" \hline " - table_row += "\n" - return table_row - - table = r""" -{\renewcommand{\arraystretch}{1.5}% -\begin{tabularx}{\textwidth}{zs|s|z|zz} -& & \multicolumn{1}{c|}{Serial} & \multicolumn{1}{c|}{Deterministic} & \multicolumn{2}{c}{Nondeterministic} \\ -& & & & \multicolumn{1}{c}{$n=5$} & \multicolumn{1}{c}{$n=10$} \\ \hline -""" - - # absolute_exec_time = not all_benchmarks - - if all_benchmarks: - for functional_config in functional_configs: - mask_cols = list(functional_config.keys()) - mask_values = list(functional_config.values()) - mask = (aggregated[mask_cols] == mask_values).all(axis=1) - - total_cores = int(aggregated.loc[mask, "total_cores_parallel"].values[0]) - label = "Average @ {} SM's".format(total_cores) - - table += "%\n%\n" - table += ( - r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" - + label - + r"}} \\ \hline" - + "\n" - ) - - print("=> functional config: {}".format(functional_config)) - - num_bench_configs = num_benchmarks # todo - table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( - aggregated[mask], - num_bench_configs=num_bench_configs, - # all_benchmarks=True - ) + selected_df = load_stats(bench_name=bench_name, profiler=profiler, path=path) + gpucachesim.stats.result_table.result_table(selected_df, bench_name=bench_name, metrics=[metric], combined_only=combined_only, verbose=verbose, png=png) - table += "%\n%\n" - - table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) - for row in table_rows: - bold_values = [] - if row.metric == r"exec\\time": - bold_values = [np.amax(row.values())] - # bold_values = [np.amin(row.values())] - # if absolute_exec_time: - # # when exec time is absolute, take minimum - # bold_values = [np.amin(row.values())] - # else: - # # when exec time is speedup, take maximum - # bold_values = [np.amax(row.values())] - print(row.metric, bold_values, row.values()) - table += write_table_row(row, bold_values) +@main.command(name="all-result-table") +# @click.pass_context +@click.option("--path", help="Path to materialized benchmark config") +@click.option("--bench", "bench_name", help="Benchmark name") +@click.option("--metric", "metric", type=str, help="metric") +@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") +@click.option( + "-v", "--vebose", "verbose", type=bool, is_flag=True, help="enable verbose output" +) +@click.option("--png", "png", type=bool, is_flag=True, help="convert to png") +def all_result_table(path, bench_name, metric, nsight, verbose, png): + profiler = "nsight" if nsight else "nvprof" + selected_df = load_stats(bench_name=None, profiler=profiler, path=path) + all_benches = sorted(list(selected_df["benchmark"].unique())) + if metric is None: + metrics = ["dramreads", "dramwrites", "l2accesses", "l2dhitrate", "l1accesses", "l1dhitrate", "cycles"] else: - for bench_config in selected_benchmarks: - bench_inputs: typing.Dict[str, typing.Any] = bench_config["inputs"] - if not all(aggregated["benchmark"] == bench_config["name"]): - # print( - # "SKIP: want {} (have {})".format( - # aggregated["benchmark"][0], bench_config["name"] - # ) - # ) - continue + metrics = [metric] - print("") - print( - color("==> {} {}".format(bench_config["name"], bench_inputs), fg="cyan") - ) + options = dict(verbose=verbose, png=png) - mask_cols = ["benchmark"] + list(bench_inputs.keys()) - mask_values = [bench_name_arg] + list(bench_inputs.values()) - - mask = (aggregated[mask_cols] == mask_values).all(axis=1) - # test_df = aggregated.loc[ - # mask, - # benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS - # + bench_input_cols - # + ["mean_blocks_per_sm_parallel"], - # ] - # test_df = test_df.drop_duplicates() - # print(test_df) - # assert len(test_df) == 1 - - table += "%\n%\n" - label = str( - compute_table_row_label(bench_config, aggregated.loc[mask].iloc[0]) - ) - table += ( - r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" - + label - + r"}} \\ \hline" - + "\n" - ) + for bench_name in all_benches: + gpucachesim.stats.result_table.result_table(selected_df.copy(), bench_name=bench_name, metrics=metrics, **options) - table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( - aggregated[mask], - num_bench_configs=1, # all_benchmarks=False - ) + for combined_only in [True, False]: + gpucachesim.stats.result_table.result_table(selected_df.copy(), bench_name=None, metrics=metrics, combined_only=combined_only, **options) - table += "%\n%\n" - - table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) - for row in table_rows: - bold_values = [] - if row.metric == r"exec\\time": - bold_values = [np.amin(row.values())] - # if absolute_exec_time: - # bold_values = [np.amin(row.values())] - # else: - # bold_values = [np.amax(row.values())] - print( - "writing table row {:<30} values={} bold={}".format( - row.metric, row.values(), bold_values - ) - ) - table += write_table_row(row, bold_values) - # add averaged row - for functional_config in functional_configs: - mask_cols = list(functional_config.keys()) - mask_values = list(functional_config.values()) - mask = (aggregated[mask_cols] == mask_values).all(axis=1) + - total_cores = int(aggregated.loc[mask, "total_cores_parallel"].values[0]) +@main.command(name="parallel-table") +# @click.pass_context +@click.option("--path", help="Path to materialized benchmark config") +@click.option("--bench", "-b", "bench_name", help="Benchmark name") +@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") +@click.option( + "--scale-clusters", + "scale_clusters", + type=bool, + default=True, + help="scale clusters instead of cores per cluster", +) +@click.option( + "--large", + "large", + type=bool, + is_flag=True, + help="only consider large inputs when computing the average speedup", +) +@click.option( + "--verbose", + "verbose", + type=bool, + default=True, + help="verbose output", +) +@click.option("--png", "png", type=bool, is_flag=True, help="convert to png") +def run_parallel_table(bench_name, path, nsight, scale_clusters, large, verbose, png): + profiler = "nsight" if nsight else "nvprof" + selected_df = load_stats(bench_name=bench_name, profiler=profiler, path=path) + gpucachesim.stats.parallel_table.parallel_table( + selected_df, bench_name=bench_name, scale_clusters=scale_clusters, large=large, verbose=verbose, png=png) - print( - color( - "==> AVERAGE for {:<4} SM's {}".format( - total_cores, functional_config - ), - fg="cyan", - ) - ) - label = "Average @ {} SM's".format(total_cores) +@main.command(name="all-parallel-table") +@click.option("--path", help="Path to materialized benchmark config") +@click.option("--nsight", "nsight", type=bool, is_flag=True, help="use nsight") +@click.option("--png", "png", type=bool, is_flag=True, help="convert to png") +def run_all_parallel_table(path, nsight, png): + profiler = "nsight" if nsight else "nvprof" + selected_df = load_stats( + bench_name=None, profiler=profiler, path=path) - table += "%\n%\n" - table += ( - r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" - + label - + r"}} \\ \hline" - + "\n" - ) + bench_names = sorted(list(selected_df["benchmark"].unique())) + configs = list(itertools.product([True, False], [True, False])) - assert num_benchmarks == 1 - num_configs = len(aggregated.loc[mask, all_input_cols].drop_duplicates()) - table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( - aggregated[mask], - num_bench_configs=num_configs, # all_benchmarks=True - ) - table += "%\n%\n" - - table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) - for row in table_rows: - bold_values = [] - if row.metric == r"exec\\time": - # if absolute_exec_time: - # # when exec time is absolute, take minimum - # bold_values = [np.amin(row.values())] - # else: - # # when exec time is speedup, take maximum - bold_values = [np.amax(row.values())] - - # print(row.metric, bold_values, row.values()) - print( - "writing table row {:<30} values={} bold={}".format( - row.metric, row.values(), bold_values - ) - ) - table += write_table_row(row, bold_values) + total = len(bench_names) * len(configs) - table += r""" -\end{tabularx}} -\end{table} -""" - print(table) - utils.copy_to_clipboard(table) - print("copied table to clipboard") + options = dict(batch=True, verbose=False, png=png) + done = 0 + for (scale_clusters, large) in configs: + print("========= {:>4}/{:<4} =======".format(done, total)) + gpucachesim.stats.parallel_table.parallel_table( + selected_df, bench_name=None, scale_clusters=scale_clusters, large=large, **options) -def load_stats(bench_name, profiler="nvprof", path=None) -> pd.DataFrame: - stats = [] - if bench_name is not None: - stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( - profiler, bench_name - ) - print("loading {}".format(stats_file)) - df = pd.read_csv(stats_file, header=0) - if len(df) < 1: - print(color("WARNING: {} is empty!".format(stats_file), fg="red")) - else: - stats.append(df) - else: - b = Benchmarks(path) - benches = utils.flatten(list(b.benchmarks[Target.Profile.value].values())) - bench_names = set([b["name"] for b in benches]) - for name in bench_names: - stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format( - profiler, name + # for all benchmarks + for bench_name in bench_names: + mask = selected_df["benchmark"] == bench_name + bench_df = selected_df[mask].copy() + gpucachesim.stats.parallel_table.parallel_table( + bench_df, bench_name=bench_name, + scale_clusters=scale_clusters, + **options, ) - print("loading {}".format(stats_file)) - df = pd.read_csv(stats_file, header=0) - if len(df) < 1: - print(color("WARNING: {} is empty!".format(stats_file), fg="red")) - else: - stats.append(df) - - stats_df = pd.concat(stats, ignore_index=False) - stats_df = stats_df.sort_values(["benchmark", "target"]) - if bench_name is not None: - if isinstance(bench_name, str): - bench_names = [bench_name] - elif isinstance(bench_name, list): - bench_names = bench_name - else: - raise ValueError - stats_df = stats_df[stats_df["benchmark"].isin(bench_names)] - - # special_dtypes = { - # # **{col: "float64" for col in stats_df.columns}, - # # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, - # "target": "str", - # "benchmark": "str", - # "Host Name": "str", - # "Process Name": "str", - # "device": "str", - # "context_id": "float", - # "is_release_build": "bool", - # "kernel_function_signature": "str", - # "kernel_name": "str", - # "kernel_name_mangled": "str", - # "input_id": "float", - # # "input_memory_only": "first", - # # "input_mode": "first", - # # makes no sense to aggregate - # "cores_per_cluster": "float", - # "num_clusters": "float", - # "total_cores": "float", - # "input_memory_only": "bool", - # "input_num_clusters": "float", - # "input_cores_per_cluster": "float", - # "input_mode": "str", - # "input_threads": "float", - # "input_run_ahead": "float", - # } - # missing_dtypes = set(benchmarks.NON_NUMERIC_COLS.keys()) - set(special_dtypes.keys()) - # assert len(missing_dtypes) == 0, "missing dtypes for {}".format(missing_dtypes) - - dtypes = { - **{col: "float64" for col in stats_df.columns}, - **benchmarks.SPECIAL_DTYPES, - } - # raise ValueError("test") - dtypes = {col: dtype for col, dtype in dtypes.items() if col in stats_df} - stats_df = stats_df.astype(dtypes) - - simulation_targets = [ - Target.Simulate.value, - Target.AccelsimSimulate.value, - Target.PlaygroundSimulate.value, - ] - simulation_targets_df = stats_df[stats_df["target"].isin(simulation_targets)] - if not (simulation_targets_df["is_release_build"] == True).all(): - print(color("WARNING: non release results:", fg="red")) - non_release_results = simulation_targets_df[ - simulation_targets_df["is_release_build"] == True - ] - grouped = non_release_results.groupby(["benchmark", "target"]) - print(grouped["input_id"].count()) - print("====") - - non_float_cols = set( - [ - col - for col, dtype in benchmarks.SPECIAL_DTYPES.items() - if dtype not in ["float", "float64", "int", "int64"] - ] - ) - nan_dtype = pd.NA - fill = { - **{col: 0.0 for col in stats_df.columns}, - **{col: nan_dtype for col in non_float_cols}, - **{ - "kernel_name_mangled": nan_dtype, - "kernel_name": nan_dtype, - "device": nan_dtype, - # test this out - "kernel_launch_id": nan_dtype, - "run": nan_dtype, - }, - **{c: nan_dtype for c in benchmarks.ALL_BENCHMARK_INPUT_COLS}, - **{c: nan_dtype for c in benchmarks.SIMULATE_INPUT_COLS}, - **{ - "input_memory_only": False, - "input_num_clusters": 28, - "input_cores_per_cluster": 1, - }, - } - assert pd.isnull(fill["kernel_launch_id"]) - assert pd.isnull(fill["kernel_name"]) - # fill = { - # col: dtype for col, dtype in fill.items() - # if col not in benchmarks.CATEGORICAL_COLS - # } - - stats_df = stats_df.fillna(fill).infer_objects(copy=False) - assert stats_df["run"].isna().sum() == 0 - - def add_no_kernel_exec_time(df): - # print(df[benchmarks.PREVIEW_COLS][:4].T) - try: - before = copy.deepcopy(df.dtypes) - if df["target"].iloc[0] != Target.Simulate.value: - return df - - assert ( - len(df) >= 2 - ), "expected at least two rows: a no kernel row and at least one kernel for the config" - # print("df") - # print(df) - valid_kernels = ~df["kernel_name"].isna() - # print("valid_kernels") - # print(valid_kernels) - no_kernel = df[~valid_kernels] - # print("no kernel") - # print(no_kernel) - assert len(no_kernel) == 1 - num_valid_kernels = valid_kernels.sum() - assert num_valid_kernels >= 1 - delta = float(no_kernel["exec_time_sec"].iloc[0]) / num_valid_kernels - df.loc[valid_kernels, "exec_time_sec"] += delta - assert (df.dtypes == before).all() - return df - except Exception as e: - print(e) - return str(e) - - group_cols = list( - benchmarks.BENCH_TARGET_INDEX_COLS - + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) - + benchmarks.SIMULATE_INPUT_COLS - + ["run"] - ) - print(len(stats_df)) - group_cols = [col for col in group_cols if col in stats_df] - # pprint(group_cols) - # pprint(stats_df[group_cols].dtypes) - # stats_df = stats_df.fillna({'target': "", "benchmark": "", "input_mode": ""}) - grouped = stats_df.groupby(group_cols, dropna=False) - # grouped = grouped[stats_df.columns].fillna({'target': "", "benchmark": "", "input_mode": ""}) - # print(grouped.isna()) - # raise ValueError("grouped") - stats_df = grouped[stats_df.columns].apply(add_no_kernel_exec_time) - stats_df = stats_df.reset_index(drop=True) - # raise ValueError("its over") - - assert stats_df["run"].isna().sum() == 0 - # assert stats_df["kernel_launch_id"].isna().sum() == 0 - assert stats_df["num_clusters"].isna().sum() == 0 - return stats_df + done += 1 @main.command() @@ -3935,7 +1093,23 @@ def _inspect(df): print(" === {} === ".format(profiler)) assert len(per_config_pivoted) > 0 - print(per_config_pivoted[stat_cols].T) + preview_per_config_pivoted = per_config_pivoted.T.copy() + preview_target_name = { + Target.Simulate.value.lower(): "Ours", + Target.AccelsimSimulate.value.lower(): "Accel", + Target.PlaygroundSimulate.value.lower(): "Play", + Target.Profile.value.lower(): "Native", + } + print(preview_per_config_pivoted.index) + preview_per_config_pivoted.index = preview_per_config_pivoted.index.set_levels( + [ + preview_target_name[target.lower()] + for target in preview_per_config_pivoted.index.levels[1].values + ], + level=1, + ) + print(preview_per_config_pivoted.index) + print(preview_per_config_pivoted.loc[pd.IndexSlice[stat_cols, :], :]) def build_per_config_table(df): assert len(df) > 0 @@ -4104,16 +1278,18 @@ def dedup_and_count(l): selected_table_benchmarks = [ # babelstream pd.DataFrame.from_records( - [ - ("babelstream", 10240.0), - ("babelstream", 102400.0), - ] - if per_kernel - else [ - ("babelstream", 1024.0), - ("babelstream", 10240.0), - ("babelstream", 102400.0), - ], + ( + [ + ("babelstream", 10240.0), + ("babelstream", 102400.0), + ] + if per_kernel + else [ + ("babelstream", 1024.0), + ("babelstream", 10240.0), + ("babelstream", 102400.0), + ] + ), columns=["benchmark", "input_size"], ), # transpose @@ -4131,14 +1307,15 @@ def dedup_and_count(l): # simple matrixmul pd.DataFrame.from_records( [ - ("simple_matrixmul", 32, 32, 32), - ("simple_matrixmul", 128, 128, 128), - ("simple_matrixmul", 32, 64, 128), - ("simple_matrixmul", 128, 32, 32), - ("simple_matrixmul", 128, 512, 128), - ("simple_matrixmul", 512, 32, 512), + ("simple_matrixmul", 32, 32, 32, 32), + ("simple_matrixmul", 32, 128, 128, 128), + ("simple_matrixmul", 32, 32, 64, 128), + ("simple_matrixmul", 32, 128, 32, 32), + # extra configs + ("simple_matrixmul", 32, 128, 512, 128), + ("simple_matrixmul", 32, 512, 32, 512), ], - columns=["benchmark", "input_m", "input_n", "input_p"], + columns=["benchmark", "input_dtype", "input_m", "input_n", "input_p"], ), # matrixmul pd.DataFrame.from_records( @@ -4211,16 +1388,16 @@ def dedup_and_count(l): per_config.loc[:, "label"] = per_config.apply( partial(compute_label_for_benchmark_df, per_kernel=per_kernel), axis=1 ) - per_config.loc[ - per_config["target"] == Target.Simulate.value, "target_name" - ] = "gpucachesim" + per_config.loc[per_config["target"] == Target.Simulate.value, "target_name"] = ( + "gpucachesim" + ) per_config.loc[ per_config["target"] == Target.AccelsimSimulate.value, "target_name" ] = "AccelSim" - per_config.loc[ - per_config["target"] == Target.Profile.value, "target_name" - ] = per_config.loc[~per_config["device"].isna(), "device"].apply( - gpucachesim.stats.native.normalize_nvprof_device_name + per_config.loc[per_config["target"] == Target.Profile.value, "target_name"] = ( + per_config.loc[~per_config["device"].isna(), "device"].apply( + gpucachesim.stats.native.normalize_nvprof_device_name + ) ) # targets = sorted(per_config["target"].unique().tolist()) @@ -4955,6 +2132,7 @@ def stderr(df): "mean_micros", "mean_millis", "exec_time_sec", + # "total_cores", ] ].agg(["min", "max", "mean", "median", "std", "sem", stderr]) @@ -4971,15 +2149,30 @@ def stderr(df): # pd.options.display.float_format = "{:.2f}".format # print(averaged["total"]) - def compute_ahmdahl_speedup(p, s=None): - if s is None: - s = 1 - p - return 1 / ((1 - p) + p / s) + def compute_gustafson_speedup(p, n): + s = 1 - p + assert 1 + (n - 1) * p == s + p * n + return 1 + (n - 1) * p + + def compute_amdahl_speedup(p, n): + """p is the fraction of parallelizeable work. n is the speedup of that parallel part, i.e. number of processors.""" + return 1 / ((1 - p) + p / n) + + threads = 8 + parallel_frac = float(averaged.loc["cycle::core", ("share", "median")]) + amdahl_speedup = compute_amdahl_speedup(p=parallel_frac, n=threads) + print( + "AMDAHL SPEEDUP = {:>6.3f}x for {:>2} threads (p={:>5.2f})".format( + amdahl_speedup, threads, parallel_frac + ) + ) - ahmdahl_speedup = compute_ahmdahl_speedup( - p=averaged.loc["cycle::core", ("share", "median")] + gustafson_speedup = compute_gustafson_speedup(p=parallel_frac, n=threads) + print( + "GUSTAFSON SPEEDUP = {:>6.3f}x for {:>2} threads (p={:>5.2f})".format( + gustafson_speedup, threads, parallel_frac + ) ) - print("AHMDAHL SPEEDUP = {:<6.3f}".format(ahmdahl_speedup)) print("\n\n=== MEAN MICROSECONS") pd.options.display.float_format = "{:.6f}".format @@ -4998,27 +2191,6 @@ def compute_ahmdahl_speedup(p, s=None): print(total_cycle_share, computed_total_cycle_share.sum()) assert computed_total_cycle_share.sum() <= total_cycle_share - # issue blocks = cycle::issue_block_to_core - # cores = cycle::core - # dram = cycle::dram - # interconn = cycle::subpartitions, cycle::interconn - # cache cycle = cycle::l2 - - # timings_df["rel_err"] = timings_df["total"] / timings_df["exec_time_sec"] - # timings_df["abs_err"] = (timings_df["total"] - timings_df["exec_time_sec"]).abs() - - # double check the total duration reported in the timings matches the total exec time sec - # valid_rel = computed_total_cycle_share.sum() / timings_df["exec_time_sec"] <= 0.2 - # valid_abs = (total - exec_time_sec).abs() <= 0.1 - - # recipe = ["375 g flour", - # "75 g sugar", - # "250 g butter", - # "300 g berries"] - # - # data = [float(x.split()[0]) for x in recipe] - # ingredients = [x.split()[-1] for x in recipe] - unit = "mean_micros" agg = "median" idx = pd.MultiIndex.from_product((["share", unit], [agg, "std"])) @@ -5035,52 +2207,15 @@ def compute_ahmdahl_speedup(p, s=None): shares.loc["other", ("share", agg)] = other print(shares) - # num_sections = len(shares) - # colors = list(cmap(np.linspace(0, 1.0, num_sections))) - # for i, label in enumerate(shares.index): - # if label.lower() == "other": - # colors[i] = "whitesmoke" - - # colors = [cmap(i) for i in range(len(data))] - # print(colors) - # colors = [matplotlib.colors.to_hex(cmap(i)) for i in range(len(data))] - - # values = shares["share", agg].values * 100.0 - # bottom = 0 - # bars = [] - # for i in range(len(values)): - # bar_color = colors[i] - # share = values[i] - # label = shares.index[i] - # - # bar = plt.bar(0, share, bottom=bottom, color=bar_color, label=label) - # bottom += values[i] - # bars.append(bar) - # - # ax.set_ylim(0, 100.0) - # - # ax.legend(bars, shares.index, - # loc="center left", - # bbox_to_anchor=(1, 0, 0.5, 1)) - - # ax.set_yticks(y_pos, labels=people) - # ax.invert_yaxis() # labels read top-to-bottom - # ax.set_xlabel('Mean percentage of total simulation time') - values = shares["share", agg].values * 100.0 wedges, texts, autotexts = ax.pie( values, - # autopct=labels, # labels=shares.index, # autopct=compute_label, autopct="", - # autopct=lambda pct: func(pct, data), colors=[colors[s] for s in shares.index], - # pctdistance=1.1, # labeldistance=1.2, pctdistance=1.0, - # pctdistance=1.1, - # labeldistance=1.1, ) # textprops=dict(color="w")) @@ -5230,13 +2365,11 @@ def is_baseline(config): timing_df["target"] = bench_config["target"] timing_df["run"] = r - # print(sim_df[gpucachesim.stats.stats.INDEX_COLS + ["num_blocks"]]) - # print(grouped_sim["num_blocks"].head()) + timing_df["total_cores"] = total_cores timing_df["mean_blocks_per_sm"] = ( grouped_sim_excluding_no_kernel["num_blocks"].mean().mean() / total_cores ) - # grouped_sim["mean_blocks_per_sm"].mean().mean() timing_df["exec_time_sec"] = ( grouped_sim_including_no_kernel["elapsed_millis"].sum().sum() ) @@ -5254,7 +2387,6 @@ def is_baseline(config): def compute_exec_time_sec(df) -> float: time = df.loc[TIMING_COLS_SUMMING_TO_FULL_CYCLE, "total_sec"].sum() - # time = df.loc[pd.IndexSlice[,:,:,cols_summing_to_full_cycle], "total"].sum() return time computed_exec_time_sec = ( @@ -5304,12 +2436,6 @@ def compute_exec_time_sec(df) -> float: 0.8 * plot.DINA4_WIDTH_INCHES, 0.2 * plot.DINA4_HEIGHT_INCHES, ) - # fig = plt.figure( - # figsize=figsize, - # # layout="constrained", - # # subplot_kw=dict(aspect="equal"), - # ) - # ax = plt.axes() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize, sharex=True, sharey=True) @@ -5341,17 +2467,11 @@ def compute_exec_time_sec(df) -> float: args = dict(sections=sections, colors=colors, validate=validate) - print("=============== CTA/block <= 1 =============") - title = r"$N_{\text{CTA}}$/core $\leq 1$" - title += "\n({} benchmark samples)".format(len(timings_df)) - - # print(timings_df.groupby(index_cols)["total"]) - # total_sec = timings_df.groupby(index_cols)["total_sec"].first().median() - # total_sec = timings_df.loc["cycle::total", "total_sec"].groupby(index_cols)["total_sec"].first().median() - # total_micros = total_sec * 1e6 - # assert 1e6 == 1000_000 - # print(total) - # .loc[("share", "mean_micros")].T["cycle::total"] + print("=============== blocks/core <= 1 =============") + title = r"$N_{\text{blocks}}$/SM $\leq 1$" + samples = len(timings_df[index_cols].drop_duplicates()) + title += "\n({} benchmark configurations)".format(samples) + total_micros = ( timings_df.loc["cycle::total", :] .groupby(index_cols)["mean_micros"] @@ -5364,9 +2484,10 @@ def compute_exec_time_sec(df) -> float: ax1, timings_df, title=title, **args ) - print("=============== CTA/block > 1 =============") - title = r"$N_{\text{CTA}}$/core $>1$" - title += "\n({} benchmark samples)".format(len(sufficient_size_timings_df)) + print("=============== blocks/core > 1 =============") + title = r"$N_{\text{blocks}}$/SM $>1$" + samples = len(sufficient_size_timings_df[index_cols].drop_duplicates()) + title += "\n({} benchmark configurations)".format(samples) total_micros = ( sufficient_size_timings_df.loc["cycle::total", :] @@ -5380,48 +2501,19 @@ def compute_exec_time_sec(df) -> float: ax2, sufficient_size_timings_df, title=title, **args ) - # labels = sections - # labels = shares.index - # labels = [r"{} (${:4.1f}\%$)".format(label, values[i]) - # for i, label in enumerate(shares.index)] - - # labels = [label.removeprefix("cycle::").replace("_", " ").capitalize() for label in shares.index] - # legend = ax.legend(wedges, labels, - # # title="Ingredients", - # loc="center left", - # bbox_to_anchor=(1, 0, 0.5, 1)) - # - # bbox_extra_artists.append(legend) - - # handles, labels = ax2.get_legend_handles_labels() - # print("legend", handles) - # handles, labels = plt.gca().get_legend_handles_labels() - # print("legend", handles) - - # handles_labels = [ax.get_legend_handles_labels() for ax in fig.axes] - # handles, labels = [sum(lol, []) for lol in zip(*handles_labels)] - # print(handles, labels) - # print("legend", handles) - - # handles = wedges1 - # print(handles) - # labels = sections - # fig.legend(wedges1 + wedges2, labels1 + labels2, - # fig.legend([wedges1, wedges2], [labels1, labels2], handles = wedges1 + wedges2 labels = labels1 + labels2 unique = [ (h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i] ] - # fig.legend(wedges1 + wedges2, legend = fig.legend( *zip(*unique), - # title="Ingredients", loc="center left", - # loc="lower left", - # loc="center right", bbox_to_anchor=(1.0, 0.5), - # bbox_to_anchor=(1, 0, 0.5, 1), + edgecolor="none", + frameon=False, + fancybox=False, + shadow=False, ) bbox_extra_artists = [legend] diff --git a/gpucachesim/stats/agg.py b/gpucachesim/stats/agg.py new file mode 100644 index 00000000..780f2f47 --- /dev/null +++ b/gpucachesim/stats/agg.py @@ -0,0 +1,192 @@ +import copy +import typing +import numpy as np +import pandas as pd + +import gpucachesim.benchmarks as benchmarks +from gpucachesim.benchmarks import ( + Target, +) + + +def aggregate_mean_input_config_stats( + df: pd.DataFrame, + per_kernel=True, + mean=True, + inspect=False, +) -> typing.Tuple[pd.DataFrame, typing.List[str]]: + bench_input_cols = copy.deepcopy(list(benchmarks.ALL_BENCHMARK_INPUT_COLS)) + input_cols = copy.deepcopy(benchmarks.SIMULATE_INPUT_COLS) + input_config_group_cols = list(benchmarks.BENCH_TARGET_INDEX_COLS + input_cols + bench_input_cols + ["input_id"]) + input_config_group_cols = [col for col in input_config_group_cols if col in df] + + preview_cols = [ + "target", + "benchmark", + "input_id", + "run", + "kernel_launch_id", + "kernel_name", + "kernel_name_mangled", + ] + preview_cols += ["exec_time_sec"] + preview_cols += ["cycles"] + # print(df.loc[:,preview_cols][:]) + + if not per_kernel: + # sum metrics for all kernels per input_id and run + group_cols = input_config_group_cols + ["run"] + + # TODO: + # how do we deal with NOT summing cycles for kernel, while + # we want to sum the execution time + aggregations = { + **{c: "sum" for c in sorted(df.columns)}, + **{c: "mean" for c in benchmarks.RATE_COLUMNS}, + # **{c: "first" for c in bench_input_cols + input_cols}, + **benchmarks.NON_NUMERIC_COLS, + } + aggregations = {col: agg for col, agg in aggregations.items() if col in df and not col in group_cols} + + grouped = df.groupby(group_cols, dropna=False) + + def _inspect_per_config(df): + print("\nINSPECT: metrics (per input config, PER RUN)") + print(df.loc[:, preview_cols][:10]) + pass + + if inspect: + grouped[df.columns].apply(_inspect_per_config) + df = grouped.agg(aggregations).reset_index() + + # we no longer have kernels now + df["kernel_launch_id"] = np.nan + df["kernel_name"] = np.nan + df["kernel_name_mangled"] = np.nan + + # compute mean per input_id and kernel launch id over all runs + group_cols = input_config_group_cols + ["kernel_launch_id", "kernel_name"] + + if mean: + aggregations = { + **{c: "mean" for c in sorted(df.columns)}, + **{c: "first" for c in bench_input_cols + input_cols}, + **benchmarks.NON_NUMERIC_COLS, + } + aggregations = {col: agg for col, agg in aggregations.items() if col in df and not col in group_cols} + grouped = df.groupby(group_cols, dropna=False) + + def _inspect_per_config_per_kernel(df): + print("\nINSPECT: metrics (per input config, PER KERNEL)") + print(df.loc[:, preview_cols][:10]) + pass + + if inspect: + grouped[df.columns].apply(_inspect_per_config_per_kernel) + df = grouped.agg(aggregations).reset_index() + + return df.copy(), group_cols + + +class TargetDataframes(typing.NamedTuple): + native_df: pd.DataFrame + accelsim_df: pd.DataFrame + serial_gpucachesim_df: pd.DataFrame + serial_gpucachesim_mem_only_df: pd.DataFrame + serial_gpucachesim_exec_driven_df: pd.DataFrame + parallel_gpucachesim_df: pd.DataFrame + + +class FunctionalConfig(typing.TypedDict): + num_clusters: int + cores_per_cluster: int + + +def split_into_target_dfs( + df, + per_kernel=False, + mean=False, + functional_config: typing.Optional[FunctionalConfig] = None, +) -> TargetDataframes: + df = df.reset_index() + + baseline_cores_per_cluster = benchmarks.BASELINE["cores_per_cluster"] + baseline_num_clusters = benchmarks.BASELINE["num_clusters"] + functional_config = FunctionalConfig( + cores_per_cluster=baseline_cores_per_cluster, + num_clusters=baseline_num_clusters, + ) + + def _label(label, shape): + return "{:>50}\t{}".format(label, shape) + + # native + native_mask = df["target"] == Target.Profile.value + native_df = df[native_mask] + native_df, _ = aggregate_mean_input_config_stats(native_df, per_kernel=per_kernel, mean=mean) + print(_label("native", native_df.shape)) + + # accelsim + accelsim_mask = df["target"] == Target.AccelsimSimulate.value + accelsim_df = df[accelsim_mask] + accelsim_df, _ = aggregate_mean_input_config_stats(accelsim_df, per_kernel=per_kernel, mean=mean) + print(_label("accelsim", accelsim_df.shape)) + + # gpucachesim (serial) + serial_gpucachesim_mask = df["target"] == Target.Simulate.value + serial_gpucachesim_mask &= df["input_mode"].isin(["serial", np.nan]) + serial_gpucachesim_mask &= df["input_memory_only"] == False + if functional_config is not None: + serial_gpucachesim_mask &= df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] + serial_gpucachesim_mask &= df["input_num_clusters"] == functional_config["num_clusters"] + serial_gpucachesim_df = df[serial_gpucachesim_mask] + serial_gpucachesim_df, _ = aggregate_mean_input_config_stats( + serial_gpucachesim_df, per_kernel=per_kernel, mean=mean + ) + print(_label("serial gpucachesim", serial_gpucachesim_df.shape)) + + # gpucachesim (serial, mem only) + serial_gpucachesim_mem_only_mask = df["target"] == Target.Simulate.value + serial_gpucachesim_mem_only_mask &= df["input_memory_only"] == True + serial_gpucachesim_mem_only_mask &= df["input_mode"].isin(["serial", np.nan]) + if functional_config is not None: + serial_gpucachesim_mem_only_mask &= df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] + serial_gpucachesim_mem_only_mask &= df["input_num_clusters"] == functional_config["num_clusters"] + serial_gpucachesim_mem_only_df = df[serial_gpucachesim_mem_only_mask] + serial_gpucachesim_mem_only_df, _ = aggregate_mean_input_config_stats( + serial_gpucachesim_mem_only_df, per_kernel=per_kernel, mean=mean + ) + print(_label("serial gpucachesim (mem only)", serial_gpucachesim_mem_only_df.shape)) + + # gpucachesim (serial, exec-driven) + serial_gpucachesim_exec_driven_mask = df["target"] == Target.ExecDrivenSimulate.value + # print("mask num", sum(serial_gpucachesim_exec_driven_mask)) + # print(df.loc[serial_gpucachesim_exec_driven_mask, ["target", "input_memory_only", "input_mode"]]) + serial_gpucachesim_exec_driven_mask &= df["input_mode"].isin(["serial", "", np.nan]) + serial_gpucachesim_exec_driven_df = df[serial_gpucachesim_exec_driven_mask] + serial_gpucachesim_exec_driven_df, _ = aggregate_mean_input_config_stats( + serial_gpucachesim_exec_driven_df, per_kernel=per_kernel, mean=mean + ) + print(_label("serial gpucachesim (exec driven)", serial_gpucachesim_exec_driven_df.shape)) + + # gpucachesim (parallel) + parallel_gpucachesim_mask = df["target"] == Target.Simulate.value + parallel_gpucachesim_mask &= df["input_mode"] != "serial" + parallel_gpucachesim_mask &= df["input_memory_only"] == False + if functional_config is not None: + parallel_gpucachesim_mask &= df["input_cores_per_cluster"] == functional_config["cores_per_cluster"] + parallel_gpucachesim_mask &= df["input_num_clusters"] == functional_config["num_clusters"] + parallel_gpucachesim_df = df[parallel_gpucachesim_mask] + parallel_gpucachesim_df, _ = aggregate_mean_input_config_stats( + parallel_gpucachesim_df, per_kernel=per_kernel, mean=mean + ) + print(_label("parallel gpucachesim", parallel_gpucachesim_df.shape)) + + return TargetDataframes( + native_df=native_df, + accelsim_df=accelsim_df, + serial_gpucachesim_df=serial_gpucachesim_df, + serial_gpucachesim_mem_only_df=serial_gpucachesim_mem_only_df, + serial_gpucachesim_exec_driven_df=serial_gpucachesim_exec_driven_df, + parallel_gpucachesim_df=parallel_gpucachesim_df, + ) diff --git a/gpucachesim/stats/common.py b/gpucachesim/stats/common.py index ec337fb8..fa529ce9 100644 --- a/gpucachesim/stats/common.py +++ b/gpucachesim/stats/common.py @@ -5,12 +5,7 @@ import re import typing import os - - -BASELINE = dict( - cores_per_cluster=1, - num_clusters=28, -) +import gpucachesim.benchmarks as benchmarks def function_name_from_signature(sig: str) -> str: @@ -44,13 +39,15 @@ def __init__(self, result_df: pd.DataFrame) -> None: def cores_per_cluster(self): return int( self.bench_config["values"].get( - "cores_per_cluster", BASELINE["cores_per_cluster"] + "cores_per_cluster", benchmarks.BASELINE["cores_per_cluster"] ) ) def num_clusters(self): return int( - self.bench_config["values"].get("num_clusters", BASELINE["num_clusters"]) + self.bench_config["values"].get( + "num_clusters", benchmarks.BASELINE["num_clusters"] + ) ) def total_cores(self): diff --git a/gpucachesim/stats/load.py b/gpucachesim/stats/load.py new file mode 100644 index 00000000..baa05553 --- /dev/null +++ b/gpucachesim/stats/load.py @@ -0,0 +1,181 @@ +import pandas as pd +import copy +from wasabi import color +import gpucachesim.utils as utils +import gpucachesim.benchmarks as benchmarks + +from gpucachesim import REPO_ROOT_DIR +from gpucachesim.benchmarks import ( + Target, + Benchmarks, +) + + +def load_stats(bench_name, profiler="nvprof", path=None) -> pd.DataFrame: + stats = [] + if bench_name is not None: + stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format(profiler, bench_name) + print("loading {}".format(stats_file)) + df = pd.read_csv(stats_file, header=0) + if len(df) < 1: + print(color("WARNING: {} is empty!".format(stats_file), fg="red")) + else: + stats.append(df) + else: + b = Benchmarks(path) + benches = utils.flatten(list(b.benchmarks[Target.Profile.value].values())) + bench_names = set([b["name"] for b in benches]) + for name in bench_names: + stats_file = REPO_ROOT_DIR / "results/combined.stats.{}.{}.csv".format(profiler, name) + print("loading {}".format(stats_file)) + df = pd.read_csv(stats_file, header=0) + if len(df) < 1: + print(color("WARNING: {} is empty!".format(stats_file), fg="red")) + else: + stats.append(df) + + stats_df = pd.concat(stats, ignore_index=False) + stats_df = stats_df.sort_values(["benchmark", "target"]) + if bench_name is not None: + if isinstance(bench_name, str): + bench_names = [bench_name] + elif isinstance(bench_name, list): + bench_names = bench_name + else: + raise ValueError + stats_df = stats_df[stats_df["benchmark"].isin(bench_names)] + + # special_dtypes = { + # # **{col: "float64" for col in stats_df.columns}, + # # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, + # "target": "str", + # "benchmark": "str", + # "Host Name": "str", + # "Process Name": "str", + # "device": "str", + # "context_id": "float", + # "is_release_build": "bool", + # "kernel_function_signature": "str", + # "kernel_name": "str", + # "kernel_name_mangled": "str", + # "input_id": "float", + # # "input_memory_only": "first", + # # "input_mode": "first", + # # makes no sense to aggregate + # "cores_per_cluster": "float", + # "num_clusters": "float", + # "total_cores": "float", + # "input_memory_only": "bool", + # "input_num_clusters": "float", + # "input_cores_per_cluster": "float", + # "input_mode": "str", + # "input_threads": "float", + # "input_run_ahead": "float", + # } + # missing_dtypes = set(benchmarks.NON_NUMERIC_COLS.keys()) - set(special_dtypes.keys()) + # assert len(missing_dtypes) == 0, "missing dtypes for {}".format(missing_dtypes) + + dtypes = { + **{col: "float64" for col in stats_df.columns}, + **benchmarks.SPECIAL_DTYPES, + } + # raise ValueError("test") + dtypes = {col: dtype for col, dtype in dtypes.items() if col in stats_df} + stats_df = stats_df.astype(dtypes) + + simulation_targets = [ + Target.Simulate.value, + Target.AccelsimSimulate.value, + Target.PlaygroundSimulate.value, + ] + simulation_targets_df = stats_df[stats_df["target"].isin(simulation_targets)] + if not (simulation_targets_df["is_release_build"] == True).all(): + print(color("WARNING: non release results:", fg="red")) + non_release_results = simulation_targets_df[simulation_targets_df["is_release_build"] == True] + grouped = non_release_results.groupby(["benchmark", "target"]) + print(grouped["input_id"].count()) + print("====") + + non_float_cols = set( + [col for col, dtype in benchmarks.SPECIAL_DTYPES.items() if dtype not in ["float", "float64", "int", "int64"]] + ) + nan_dtype = pd.NA + fill = { + **{col: 0.0 for col in stats_df.columns}, + **{col: nan_dtype for col in non_float_cols}, + **{ + "kernel_name_mangled": nan_dtype, + "kernel_name": nan_dtype, + "device": nan_dtype, + # test this out + "kernel_launch_id": nan_dtype, + "run": nan_dtype, + }, + **{c: nan_dtype for c in benchmarks.ALL_BENCHMARK_INPUT_COLS}, + **{c: nan_dtype for c in benchmarks.SIMULATE_INPUT_COLS}, + **{ + "input_memory_only": False, + "input_num_clusters": 28, + "input_cores_per_cluster": 1, + }, + } + assert pd.isnull(fill["kernel_launch_id"]) + assert pd.isnull(fill["kernel_name"]) + # fill = { + # col: dtype for col, dtype in fill.items() + # if col not in benchmarks.CATEGORICAL_COLS + # } + + stats_df = stats_df.fillna(fill).infer_objects(copy=False) + assert stats_df["run"].isna().sum() == 0 + + def add_no_kernel_exec_time(df): + # print(df[benchmarks.PREVIEW_COLS][:4].T) + try: + before = copy.deepcopy(df.dtypes) + if df["target"].iloc[0] != Target.Simulate.value: + return df + + assert len(df) >= 2, "expected at least two rows: a no kernel row and at least one kernel for the config" + # print("df") + # print(df) + valid_kernels = ~df["kernel_name"].isna() + # print("valid_kernels") + # print(valid_kernels) + no_kernel = df[~valid_kernels] + # print("no kernel") + # print(no_kernel) + assert len(no_kernel) == 1 + num_valid_kernels = valid_kernels.sum() + assert num_valid_kernels >= 1 + delta = float(no_kernel["exec_time_sec"].iloc[0]) / num_valid_kernels + df.loc[valid_kernels, "exec_time_sec"] += delta + assert (df.dtypes == before).all() + return df + except Exception as e: + print(e) + return str(e) + + group_cols = list( + benchmarks.BENCH_TARGET_INDEX_COLS + + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) + + benchmarks.SIMULATE_INPUT_COLS + + ["run"] + ) + print(len(stats_df)) + group_cols = [col for col in group_cols if col in stats_df] + # pprint(group_cols) + # pprint(stats_df[group_cols].dtypes) + # stats_df = stats_df.fillna({'target': "", "benchmark": "", "input_mode": ""}) + grouped = stats_df.groupby(group_cols, dropna=False) + # grouped = grouped[stats_df.columns].fillna({'target': "", "benchmark": "", "input_mode": ""}) + # print(grouped.isna()) + # raise ValueError("grouped") + stats_df = grouped[stats_df.columns].apply(add_no_kernel_exec_time) + stats_df = stats_df.reset_index(drop=True) + # raise ValueError("its over") + + assert stats_df["run"].isna().sum() == 0 + # assert stats_df["kernel_launch_id"].isna().sum() == 0 + assert stats_df["num_clusters"].isna().sum() == 0 + return stats_df diff --git a/gpucachesim/stats/metrics.py b/gpucachesim/stats/metrics.py new file mode 100644 index 00000000..45cd9da1 --- /dev/null +++ b/gpucachesim/stats/metrics.py @@ -0,0 +1,194 @@ +import typing +import numpy as np +import sklearn.metrics + + +def slowdown(baseline, values): + return values / baseline + + +def speedup(baseline, values): + return baseline / values + + +def geo_mean(values: np.ndarray) -> np.ndarray: + a = np.array(values) + return a.prod() ** (1.0 / len(a)) + + +# def geo_mean(values: np.narray): +# return np.exp(np.log(values).mean()) + + +def bounded_relative_absolute_error(true_values: np.ndarray, values: np.ndarray, **kwargs) -> np.ndarray: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + correct = values == true_values + + # we only deal with positive numbers + assert np.all(values >= 0.0) + assert np.all(true_values >= 0.0) + + brae = values.abs() / (values.abs() + true_values.abs()) + brae = brae.fillna(0.0) + # brae[brae] = 0.0 + brae[brae == 0.0] = 0.0 + return brae + + +def rel_err(true_values: np.ndarray, values: np.ndarray, eps: typing.Optional[float] = None) -> np.ndarray: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + correct = values == true_values + + # we only deal with positive numbers + assert np.all(values >= 0.0) + assert np.all(true_values >= 0.0) + + # because we only use posive numbers, we can safely clip to a small positive epsilon + # if eps is not None: + # values = values + eps + # true_values = true_values + eps + # # true_values = np.clip(true_values, a_min=eps, a_max=None) + rel_err = (values - true_values).abs() / true_values + # rel_err = values.abs() / (values.abs() + true_values.abs()) + + # print(values) + # print(true_values) + # print(values == true_values) + rel_err = rel_err.fillna(0.0) + rel_err[correct] = 0.0 + rel_err[rel_err == 0.0] = 0.0 + + return rel_err + + +def rpd(true_values: np.ndarray, values: np.ndarray): + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + pass + # rel_err = (values - true_values).abs() / true_values + # rel_err = rel_err.fillna(0.0) + # rel_err[rel_err == 0.0] = 0.0 + # return rel_err + + +def mse(true_values, values) -> float: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + return sklearn.metrics.mean_squared_error(true_values, values) + + +def rmse_real(true_values, values) -> float: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + return ((values - true_values) ** 2).mean() ** 0.5 + + +def rmse(true_values, values) -> float: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + diff = values - true_values + scale = values.abs() + true_values.abs() + return (diff / scale).mean() + + +def abs_err(true_values: np.ndarray, values: np.ndarray) -> np.array: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + return (true_values - values).abs() + # return sklearn.metrics.mean_absolute_error(true_values, values) + + +def smape(true_values: np.ndarray, values: np.ndarray) -> float: + """SMAPE (symmetric)""" + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + + smape = (values - true_values).abs() / (values.abs() + true_values.abs()) + smape[values == true_values] = 0.0 + return smape.mean() + + +def ermsle(true_values: np.ndarray, values: np.ndarray) -> float: + """ERMSLE: Exponential root mean square log error""" + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + ratios = values / true_values + ratios[values == true_values] = 1.0 + + log_ratios = np.empty_like(ratios) + valid_mask = np.isfinite(ratios) & ratios != 0 + + # temp + ratios[~valid_mask] = 1.0 + log_ratios = np.abs(np.log(ratios)) ** 2 + # undo temp + log_ratios[~valid_mask] = np.nan + # mean + rmsle = np.sqrt(np.mean(log_ratios[valid_mask])) + # exponential + rmsle = np.abs(np.exp(rmsle)) + return rmsle + + +def emale(true_values: np.ndarray, values: np.ndarray) -> float: + """EMALE: Exponential mean absolute log error""" + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + ratios = values / true_values + ratios[values == true_values] = 1.0 + + log_ratios = np.empty_like(ratios) + valid_mask = np.isfinite(ratios) & ratios != 0 + + # temp + ratios[~valid_mask] = 1.0 + log_ratios = np.abs(np.log(ratios)) + # undo temp + log_ratios[~valid_mask] = np.nan + # mean + male = np.mean(log_ratios[valid_mask]) + # exponential + emale = np.abs(np.exp(male)) + return emale + + +def mape(true_values: np.ndarray, values: np.ndarray) -> np.array: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + return sklearn.metrics.mean_absolute_percentage_error(true_values, values) + + +def correlation(true_values: np.ndarray, values: np.ndarray, atol=None) -> float: + values = values.fillna(0.0) + true_values = true_values.fillna(0.0) + # print("true values", true_values) + # print("values", values) + # print("values sum", values.sum()) + # print("values stddev", values.std()) + # print("true values stddev", true_values.std()) + # if values.sum() > 0 and : + assert np.all(np.isfinite(values)) + assert np.all(np.isfinite(true_values)) + + # this does not change anything about the std dev + # values += 1.0 + # true_values += 1.0 + + if values.std() != 0 and true_values.std() != 0: + return np.corrcoef(true_values, values)[0][1] + elif atol is not None and np.allclose( + np.amin([values, true_values], axis=0), + np.amax([values, true_values], axis=0), + atol=atol, + ): + return 1.0 + else: + assert len(values) == len(true_values) + assert len(np.amin([values, true_values], axis=0)) == len(values) + a = np.amin([values, true_values], axis=0) + b = np.amax([values, true_values], axis=0) + print(a, b) + print(np.abs(a - b)) + return np.nan diff --git a/gpucachesim/stats/native.py b/gpucachesim/stats/native.py index 9e8baad0..b08cc973 100644 --- a/gpucachesim/stats/native.py +++ b/gpucachesim/stats/native.py @@ -694,9 +694,9 @@ def compute_native_result_df_nvprof(self): self.result_df["kernel_name"] = self.result_df[ "kernel_function_signature" ].apply( - lambda sig: np.nan - if pd.isnull(sig) - else common.function_name_from_signature(sig) + lambda sig: ( + np.nan if pd.isnull(sig) else common.function_name_from_signature(sig) + ) ) # map sorted correlation ids to increasing launch ids diff --git a/gpucachesim/stats/parallel_table.py b/gpucachesim/stats/parallel_table.py new file mode 100644 index 00000000..0ceee3b1 --- /dev/null +++ b/gpucachesim/stats/parallel_table.py @@ -0,0 +1,1709 @@ +import typing +import copy + +import numpy as np +import pandas as pd +from wasabi import color +from pprint import pprint + +import gpucachesim.benchmarks as benchmarks +import gpucachesim.utils as utils +import gpucachesim.plot as plot +import gpucachesim.stats.metrics as metrics +import gpucachesim.stats.agg + +from gpucachesim.benchmarks import ( + Target, +) + +class ParallelTableRow(typing.NamedTuple): + metric: str + threads: int + serial_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] + det_value: typing.Optional[typing.Tuple[float, typing.Union[float, int, str]]] + nondet_values: typing.Sequence[typing.Tuple[float, typing.Union[float, int, str]]] + + def values(self): + values = [] + if self.serial_value is not None: + values.append(self.serial_value[0]) + if self.det_value is not None: + values.append(self.det_value[0]) + values += [v[0] for v in self.nondet_values] + return values + + +def build_parallel_table_rows( + df: pd.DataFrame, + # num_bench_configs: int, + thousands_round_to=1, + variable_precision=True, + verbose=True, +) -> typing.Sequence[ParallelTableRow]: + # interleave_n = list(itertools.product([False, True], [5, 10])) + run_ahead_values = [5, 10] + for run_ahead in run_ahead_values: + # print(df["input_run_ahead_parallel"].unique()) + assert run_ahead in df["input_run_ahead_parallel"].unique() + + table_rows: typing.Sequence[ParallelTableRow] = [] + + + multiple_bench_configs = len(df[["target", "benchmark", "input_id_serial"]].drop_duplicates()) > 1 + + # assert num_bench_configs > 0 + # multiple_bench_configs = num_bench_configs > 1 + + for threads in [4, 8]: + threads_mask = df["input_threads_parallel"] == threads + det_mask = df["input_mode_parallel"] == "deterministic" + nondet_mask = df["input_mode_parallel"] == "nondeterministic" + + preview_cols = ( + benchmarks.BENCH_TARGET_INDEX_COLS + + ["kernel_name", "kernel_launch_id", "run"] + + list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS)) + + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + + [col + "_parallel" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + + [ + "exec_time_sec_parallel", + "input_id_parallel", + "input_id_serial", + "cycles_serial", + "cycles_parallel", + "cycles_mape", + # "dram_reads_serial", + # "dram_reads_parallel", + # "dram_reads_rel_err", + # "dram_writes_serial", + # "dram_writes_parallel", + # "dram_writes_rel_mape", + ] + # + different_cols(det) + ) + preview_cols = [col for col in preview_cols if col in df] + + all_parallel = df[(nondet_mask | det_mask) & threads_mask] + + # diff = set(preview_cols) - set(list(all_parallel.columns)) + # print(diff) + + # benchmarks.BENCH_TARGET_INDEX_COLS + # + ["kernel_name", "kernel_launch_id", "run"] + # + list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS)) + # + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + + if verbose: + print( + color( + "==> max speedup for {} threads is {}".format( + threads, all_parallel["exec_time_sec_speedup"].max() + ), + fg="green", + ) + ) + + weird_mask = all_parallel["exec_time_sec_speedup"] > threads + weird = all_parallel.loc[weird_mask, preview_cols] + if len(weird) > 0: + print( + color( + "WARNING: weird results for {} threads:".format(threads), fg="red" + ) + ) + print(weird.T) + print("===") + # assert len(weird) == 0 + + # nondet_no_interleave_mask = df["input_mode_parallel"] == "nondeterministic" + # nondet_interleave_mask = ( + # df["input_mode_parallel"] == "nondeterministic_interleave" + # ) + # print([m.sum() for m in [ + # mask, threads_mask, det_mask, nondet_no_interleave_mask, nondet_interleave_mask + # ]]) + + det = df[threads_mask & det_mask] + # if False: + # if num_bench_configs > 1: + # print(det.loc[det["benchmark"] == "vectorAdd", preview_cols].T) + # else: + # print(det.loc[:, preview_cols].T) + + all_nondet = df[threads_mask & nondet_mask] + # nondet_no_interleave = df[threads_mask & nondet_no_interleave_mask] + # nondet_interleave = df[threads_mask & nondet_interleave_mask] + + if verbose: + print( + "num deterministic={} num nondeterministic={}".format( + # num benchmark configs={}".format( + len(det), len(all_nondet), # num_bench_configs + ) + ) + + # print(det) + # if not large: + + # assert len(det) == num_bench_configs + # assert len(all_nondet) == len(run_ahead_values) * num_bench_configs + + # assert len(nondet_no_interleave) == 2 * num_bench_configs + # assert len(nondet_interleave) == 2 * num_bench_configs + # assert ( + # len( + # df[[ + # "exec_time_sec_serial", + # "cycles_serial", + # "input_id_serial", + # ]].drop_duplicates() + # ) + # == 1 + # ) + + parallel_preview_cols = list( + benchmarks.BENCH_TARGET_INDEX_COLS + + ["input_id_serial", "input_id_parallel"] + + benchmarks.INDEX_COLS + + [c for c in benchmarks.SIMULATE_INPUT_COLS] + + [c + "_parallel" for c in benchmarks.SIMULATE_INPUT_COLS] + + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) + ) + parallel_preview_cols += [ + "total_cores_parallel", + "num_blocks_parallel", + "mean_blocks_per_sm_parallel", + "exec_time_sec_serial", + "exec_time_sec_parallel", + "exec_time_sec_speedup", + "cycles_serial", + "cycles_parallel", + "cycles_mape", + ] + parallel_preview_cols = [col for col in parallel_preview_cols if col in df] + + spacer = " " + ("=" * 20) + " " + + # exec time (speedup) + serial_exec_time = df.loc[threads_mask, "exec_time_sec_serial"].mean() + det_exec_time = det["exec_time_sec_parallel"].mean() + det_speedup = det["exec_time_sec_speedup"].mean() + # if multiple_bench_configs: + + if verbose: + print("") + print( + spacer + + "DETERMINISTIC {} threads={}".format(det.shape, threads) + + spacer + ) + print(det[parallel_preview_cols][:8].T) + + # make sure we aggregate a single functional config only + assert det["input_cores_per_cluster"].nunique() == 1 + assert det["input_num_clusters"].nunique() == 1 + assert det["input_memory_only"].nunique() == 1 + + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + # print("run ahead={}".format(run_ahead)) + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + + if verbose: + print("") + print( + spacer + + "NONDETERMINISTIC {} threads={} run ahead={}".format( + nondet.shape, threads, run_ahead + ) + + spacer + ) + print(nondet[parallel_preview_cols][:8].T) + + # print(nondet.T) + # assert len(nondet) == 1 + # if not large: + # assert len(nondet) == num_bench_configs + + nondet_exec_time = nondet["exec_time_sec_parallel"].mean() + nondet_speedup = nondet["exec_time_sec_speedup"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + nondet_speedup, + "${}x$".format( + plot.round_to_precision_str( + nondet_speedup, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + + else: + nondet_values.append( + ( + nondet_exec_time, + "${:>3.1f}s~({}x)$".format( + nondet_exec_time, + plot.round_to_precision_str( + nondet_speedup, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else (serial_exec_time, "${:>3.1f}s$".format(serial_exec_time)) + ) + if multiple_bench_configs: + det_value = ( + det_speedup, + "${}x$".format( + plot.round_to_precision_str( + det_speedup, round_to=1, variable_precision=variable_precision + ) + ), + ) + else: + det_value = ( + det_exec_time, + "${:>3.1f}s~({}x)$".format( + det_exec_time, + plot.round_to_precision_str( + det_speedup, round_to=1, variable_precision=variable_precision + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"exec\\time", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # cycles (rel err) + serial_cycles = int(df.loc[threads_mask, "cycles_serial"].mean()) + det_cycles = int(det["cycles_parallel"].mean()) + det_rel_err = det["cycles_mape"].mean() + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + # assert len(nondet) == num_bench_configs + + nondet_cycles = int(nondet["cycles_parallel"].mean()) + nondet_rel_err = nondet["cycles_mape"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + else: + nondet_values.append( + ( + nondet_cycles, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else ( + serial_cycles, + "${}$".format( + plot.human_format_thousands( + serial_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if multiple_bench_configs: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + else: + det_value = ( + det_cycles, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_cycles, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric="cycles", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # l1 data hit rate (rel err) + serial_l1_hit_rate = df.loc[threads_mask, "l1_hit_rate_serial"].mean() + det_l1_hit_rate = det["l1_hit_rate_parallel"].mean() + det_rel_err = det["l1_hit_rate_mae"].mean() + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + # assert len(nondet) == 1 + # assert len(nondet) == num_bench_configs + + nondet_l1_hit_rate = nondet["l1_hit_rate_parallel"].mean() + nondet_rel_err = nondet["l1_hit_rate_mae"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + 100.0 * nondet_l1_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision_str( + 100.0 * nondet_l1_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else ( + 100.0 * serial_l1_hit_rate, + "${:>2.1f}\\%$".format(100.0 * serial_l1_hit_rate), + ) + ) + if multiple_bench_configs: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + 100.0 * det_l1_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision_str( + 100.0 * det_l1_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + + table_rows.append( + ParallelTableRow( + metric=r"L1D\\hit rate", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # l2 data hit rate (rel err) + serial_l2_hit_rate = df.loc[threads_mask, "l2_hit_rate_serial"].mean() + det_l2_hit_rate = det["l2_hit_rate_parallel"].mean() + det_rel_err = det["l2_hit_rate_mae"].mean() + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + # assert len(nondet) == 1 + # assert len(nondet) == num_bench_configs + + nondet_l2_hit_rate = nondet["l2_hit_rate_parallel"].mean() + nondet_rel_err = nondet["l2_hit_rate_mae"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + 100.0 * nondet_l2_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision_str( + 100.0 * nondet_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else ( + 100.0 * serial_l2_hit_rate, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * serial_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ) + ), + ) + ) + if multiple_bench_configs: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + 100.0 * det_l2_hit_rate, + "${}\\%~({}\\%)$".format( + plot.round_to_precision_str( + 100.0 * det_l2_hit_rate, + round_to=1, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"L2D\\hit rate", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # dram reads (rel err) + serial_dram_reads = int(df.loc[threads_mask, "dram_reads_serial"].mean()) + det_dram_reads = int(det["dram_reads_parallel"].mean()) + det_rel_err = det["dram_reads_smape"].mean() + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + # assert len(nondet) == 1 + # assert len(nondet) == num_bench_configs + + nondet_dram_reads = int(nondet["dram_reads_parallel"].mean()) + nondet_rel_err = nondet["dram_reads_smape"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + nondet_dram_reads, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else ( + serial_dram_reads, + "${}$".format( + plot.human_format_thousands( + serial_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if multiple_bench_configs: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + det_dram_reads, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_dram_reads, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + + table_rows.append( + ParallelTableRow( + metric=r"DRAM\\reads", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + + # dram writes (rel err) + serial_dram_writes = int(df.loc[threads_mask, "dram_writes_serial"].mean()) + det_dram_writes = int(det["dram_writes_parallel"].mean()) + det_rel_err = det["dram_writes_smape"].mean() + nondet_values = [] + # for interleave, n in interleave_n: + for run_ahead in run_ahead_values: + # nondet = nondet_interleave if interleave else nondet_no_interleave + nondet = all_nondet[all_nondet["input_run_ahead_parallel"] == run_ahead] + # assert len(nondet) == 1 + # assert len(nondet) == num_bench_configs + + nondet_dram_writes = int(nondet["dram_writes_parallel"].mean()) + nondet_rel_err = nondet["dram_writes_smape"].mean() + if multiple_bench_configs: + nondet_values.append( + ( + 100.0 * nondet_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + else: + nondet_values.append( + ( + nondet_dram_writes, + "${} ({}\\%)$".format( + plot.human_format_thousands( + nondet_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * nondet_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + ) + + serial_value = ( + None + if multiple_bench_configs + else ( + serial_dram_writes, + "${}$".format( + plot.human_format_thousands( + serial_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ) + ), + ) + ) + if multiple_bench_configs: + det_value = ( + 100.0 * det_rel_err, + "${}\\%$".format( + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + else: + det_value = ( + det_dram_writes, + "${} ({}\\%)$".format( + plot.human_format_thousands( + det_dram_writes, + round_to=thousands_round_to, + variable_precision=variable_precision, + ), + plot.round_to_precision_str( + 100.0 * det_rel_err, + round_to=1, + variable_precision=variable_precision, + ), + ), + ) + table_rows.append( + ParallelTableRow( + metric=r"DRAM\\writes", + threads=threads, + serial_value=serial_value, + det_value=det_value, + nondet_values=nondet_values, + ) + ) + return table_rows + +def compute_table_row_label(bench_config, df): + benchmark = df["benchmark"] + bench_input_cols = copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[benchmark]) + assert all([c in df for c in bench_input_cols]) + + assert ( + df[["total_cores_parallel"]].values == df[["total_cores_serial"]].values + ).all() + + assert len(df[["input_cores_per_cluster"]].value_counts()) == 1 + assert len(df[["input_num_clusters"]].value_counts()) == 1 + assert len(df[["total_cores_parallel"]].value_counts()) == 1 + + cores_per_cluster = int(df[["input_cores_per_cluster"]].values[0]) + num_clusters = int(df[["input_num_clusters"]].values[0]) + total_cores = num_clusters * cores_per_cluster + + assert bench_config["inputs"]["input_cores_per_cluster"] == cores_per_cluster + assert bench_config["inputs"]["input_num_clusters"] == num_clusters + # print( + # df[ + # [ + # "benchmark", + # "input_cores_per_cluster", + # "input_num_clusters", + # "total_cores_parallel", + # ] + # ] + # ) + assert total_cores == int(df[["total_cores_parallel"]].values[0]) + + match benchmark.lower(): + case "vectoradd": + label = "VectorAdd (f{:<2}, {})".format( + int(df["input_dtype"]), + int(df["input_length"]), + ) + case "matrixmul": + label = "MatrixMul (f{:<2}, {}x{}x{})".format( + int(df["input_dtype"]), + int(df["input_rows"]), + int(df["input_rows"]), + int(df["input_rows"]), + ) + case "simple_matrixmul": + label = "Naive MatrixMul (f{:<2}, {}x{}x{})".format( + int(df["input_dtype"]), + int(df["input_m"]), + int(df["input_n"]), + int(df["input_p"]), + ) + case "transpose": + label = "Transpose ({}, {}x{})".format( + df["input_variant"], + int(df["input_dim"]), + int(df["input_dim"]), + ) + case "babelstream": + label = "BabelStream ({})".format(int(df["input_size"])) + case other: + label = str(other) + + label += " @ {} SM's [{:.2f} blocks/SM]".format( + int(df["total_cores_parallel"]), + float(df["mean_blocks_per_sm_parallel"]), + ) + return label + +def write_table_row(row: ParallelTableRow, _bold_values: typing.Optional[typing.Sequence[float]]=None): + if _bold_values is None: + bold_values = set() + else: + bold_values = set(_bold_values) + + def bold(v, formatted_v): + if v in bold_values: + formatted_v = formatted_v.strip() + is_math = formatted_v[0] == "$" and formatted_v[-1] == "$" + if is_math: + return r"\boldmath" + str(formatted_v) + else: + return r"\textbf{" + str(formatted_v) + "}" + return str(formatted_v) + + is_first_metric_row = row.threads == 4 + is_last_metric_row = row.threads == 8 + + table_row = "" + + # metric name + if is_first_metric_row: + table_row += r"\multirow{2}{*}{\shortstack[r]{" + str(row.metric) + r"}}" + + # threads + table_row += r" & $t=" + str(row.threads) + r"$ " + + # serial value + if row.serial_value is not None and is_first_metric_row: + table_row += ( + r" & \multirow{2}{*}{\shortstack[r]{" + + bold(row.serial_value[0], row.serial_value[1]) + + r"}} " + ) + else: + table_row += r" & " + + # deterministic value + if row.det_value is not None: + table_row += r" & " + bold(row.det_value[0], row.det_value[1]) + else: + table_row += r" & " + + # nondeterministic value + for nondet_value, formatted_nondet_value in row.nondet_values: + table_row += r" & " + bold(nondet_value, formatted_nondet_value) + table_row += r" \\ " + if is_last_metric_row: + table_row += r" \hline " + table_row += "\n" + return table_row + + + + +def parallel_table(selected_df, bench_name, scale_clusters=True, large=False, verbose=True, batch=False, png=False): + all_benchmarks = bench_name is None + + if verbose: + print(selected_df[["target", "run"]].drop_duplicates()) + + # only keep simulation and remove non kernel stats + selected_df = selected_df[selected_df["target"] == Target.Simulate.value] + selected_df = selected_df[~selected_df["kernel_name"].isna()] + # selected_df = sum_per_config_kernel_metrics(selected_df) + selected_df, _ = gpucachesim.stats.agg.aggregate_mean_input_config_stats( + selected_df, per_kernel=False, mean=False + ) + + # num_benchmarks = len(selected_df["benchmark"].unique().tolist()) + + all_input_cols = copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) + all_input_cols = sorted(list([col for col in all_input_cols if col in selected_df])) + + # bench_cols = copy.deepcopy(benchmarks.BENCH_TARGET_INDEX_COLS) + bench_input_cols = ( + [] + if all_benchmarks + else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name]) + ) + # bench_input_cols = ( + # list(copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) - set(["input_mode"])) + # if all_benchmarks else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name_arg]) + # ) + + # get serial + serial = selected_df[selected_df["input_mode"] == "serial"].copy() + + metric_cols = set(serial.columns) + metric_cols -= set([c for c in serial.columns if c.startswith("input_")]) + metric_cols -= set(benchmarks.NON_NUMERIC_COLS) + metric_cols -= set(["exec_time_sec", "run"]) + metric_cols = list(metric_cols) + metric_cols = sorted(metric_cols) + # pprint(metric_cols) + # print(serial.loc[ + # serial["input_id"] == 0, + # # ["cycles", "kernel_launch_id", "stream_id", "run"], + # ["target", "benchmark", "input_id", "kernel_name_mangled", "kernel_name", "run"] + # + metric_cols, + # ].T) + + deterministic_group_cols = [ + "target", + "benchmark", + "input_id", + "kernel_launch_id", + "kernel_name_mangled", + "kernel_name", + ] + metric_cols = [col for col in metric_cols if col not in deterministic_group_cols] + + def _inspect_deterministic_metrics(df): + print(df[metric_cols].nunique().T) + pass + + # print(serial.groupby(deterministic_group_cols, dropna=False)[metric_cols].apply(lambda df: print(df.T))) + # print(deterministic_group_cols) + # print(metric_cols) + serial_deterministic_grouped = serial.groupby( + deterministic_group_cols, dropna=False + ) + # serial_deterministic_grouped[serial.columns].apply(_inspect_deterministic_metrics) + unique_simulation_metrics = serial_deterministic_grouped[metric_cols].nunique() + assert (unique_simulation_metrics == 1).all(axis=1).all() + + # parallel + parallel = selected_df[~selected_df["input_mode"].isin([np.nan, "serial"])] + assert "total_cores" in serial + assert "total_cores" in parallel + + if verbose: + print("serial size", serial.shape) + print("parallel size", parallel.shape) + + # those are fully distinct + serial_input_ids = sorted(serial["input_id"].unique().tolist()) + parallel_input_ids = sorted(parallel["input_id"].unique().tolist()) + + if verbose: + print("{:>3} serial input ids".format(len(serial_input_ids), serial_input_ids)) + print("{:>3} parallel input ids".format(len(parallel_input_ids), parallel_input_ids)) + + if len(serial_input_ids) == 0: + raise ValueError("have zero serial benchmark configurations") + if len(parallel_input_ids) == 0: + raise ValueError("have zero parallel benchmark configurations") + + + deterministic = parallel[parallel["input_mode"] == "deterministic"] + assert len(deterministic) > 0 + unique_simulation_metrics = deterministic.groupby( + deterministic_group_cols, + dropna=False, + )[metric_cols].nunique() + + config_with_identical_results = (unique_simulation_metrics == 1).all(axis=1) + if not config_with_identical_results.all(): + bad_configs = unique_simulation_metrics[ + ~config_with_identical_results + ].reset_index() + # print(bad_configs.T) + bad = deterministic.merge( + bad_configs, + on=deterministic_group_cols, + how="inner", + suffixes=("", "_nunique"), + ) + # print(bad.T) + print(bad[deterministic_group_cols + ["run"] + metric_cols].T) + + assert ( + config_with_identical_results.all() + ), "deterministic configuration results differ for different runs, which makes them rather nondeterministic" + + # non deterministic without interleaving is also deterministic actually + nondeterministic = parallel[parallel["input_mode"] == "nondeterministic"] + # unique_simulation_metrics = nondeterministic.groupby( + # deterministic_group_cols, dropna=False + # )[metric_cols].nunique() + assert len(nondeterministic) > 0 + + input_id_partitoning = set(serial["input_id"].unique()).intersection( + set(parallel["input_id"].unique()) + ) + if len(input_id_partitoning) > 0: + print(color("serial and parallel input ids intersect ", fg="red")) + for input_id in input_id_partitoning: + input_preview_cols = list( + ["input_id"] + + benchmarks.BENCH_TARGET_INDEX_COLS + + ["kernel_launch_id"] + + bench_input_cols + + benchmarks.SIMULATE_INPUT_COLS + ) + + print("serial with input id", input_id) + print(serial.loc[serial["input_id"] == input_id, input_preview_cols]) + print("parallel input", input_id) + print(parallel.loc[parallel["input_id"] == input_id, input_preview_cols]) + break + assert ( + len(input_id_partitoning) == 0 + ), "serial and parallel inputs intersect, this is generally solved by regenerating the aggregated csv stats" + + # join based on input_cols, NOT based on mode + join_cols = list( + benchmarks.BENCH_TARGET_INDEX_COLS + + ["kernel_name", "kernel_launch_id", "run"] + + ( + list( + copy.deepcopy(benchmarks.ALL_BENCHMARK_INPUT_COLS) - set(["input_mode"]) + ) + if all_benchmarks + else copy.deepcopy(benchmarks.BENCHMARK_INPUT_COLS[bench_name]) + ) + + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + ) + if verbose: + print("JOIN COLS:") + pprint(join_cols) + + pre_join_preview_cols = ["benchmark", "kernel_name", "kernel_launch_id", "run"] + serial_indices = serial[pre_join_preview_cols].drop_duplicates(ignore_index=True) + parallel_indices = parallel[pre_join_preview_cols].drop_duplicates( + ignore_index=True + ) + # print(serial_indices) + # print(parallel_indices) + diff = parallel_indices.compare(serial_indices) + if len(diff) != 0: + print("DIFF START") + print(diff) + print("DIFF END") + assert len(diff) == 0 + + joined = parallel.merge( + serial, + on=join_cols, + how="left", + suffixes=("_parallel", "_serial"), + ) + if verbose: + print( + "joined={} parallel={} serial={}".format( + joined.shape, parallel.shape, serial.shape + ) + ) + + # test_df = joined + # test_df = serial + # test = test_df["target"] == Target.Simulate.value + # test &= test_df["benchmark"] == "vectorAdd" + # test &= test_df["input_id"] == 1 + # # test &= joined["kernel_name"] == "vecAdd" + # # test &= joined["kernel_launch_id"] == 0 + # # test &= joined["run"] == 1 + # # test &= joined["input_memory_only"] == False + # # test &= joined["input_num_clusters"] == 56 + # # test &= joined["input_cores_per_cluster"] == 1 + # pprint(list(test_df.columns.tolist())) + # print(test_df.loc[test, join_cols]) + + if verbose: + print( + "post join serial input ids", + sorted(joined["input_id_serial"].unique().tolist()), + ) + + assert joined.shape[0] == parallel.shape[0] + assert "mean_blocks_per_sm_parallel" in joined + assert "total_cores_parallel" in joined + assert "cores_per_cluster_parallel" in joined + + # this does no longer hold, since for parallel we currently do not run + # memory only, so there are some serial input ids that cannot be compared + # to parallel input ids. + # assert set(joined["input_id_serial"].values) == set(serial["input_id"].values) + + if len(joined) == 0: + raise ValueError("joined parallel and serial dataframe is empty") + + if large: + joined = joined[joined["mean_blocks_per_sm_parallel"] > 1.0] + + preview_metric_cols = ["cycles", "exec_time_sec", "l2_hit_rate", "l1_hit_rate"] + preview_cols = list( + benchmarks.BENCH_TARGET_INDEX_COLS + + ["kernel_name", "kernel_launch_id", "run"] + + ["input_id_serial", "input_id_parallel"] + + bench_input_cols + + [c + "_serial" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + + [c + "_parallel" for c in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + + sorted( + [c + "_serial" for c in preview_metric_cols] + + [c + "_parallel" for c in preview_metric_cols] + ) + ) + # print(joined[preview_cols][:4].T) + + group_cols = sorted( + benchmarks.BENCH_TARGET_INDEX_COLS + # + ["input_id_serial"] + + ["input_id_serial", "input_id_parallel"] + + bench_input_cols + + benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + + [col + "_parallel" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + + [col + "_serial" for col in benchmarks.SIMULATE_EXECUTION_CONFIG_COLS] + ) + if verbose: + print("GROUP COLS:") + pprint(group_cols) + # assert "input_id" not in group_cols + # assert "input_id_serial" not in group_cols + + aggregations = { + **{c: "mean" for c in sorted(joined.columns)}, + **{c: agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, + **{c + "_parallel": agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, + **{c + "_serial": agg for c, agg in benchmarks.NON_NUMERIC_COLS.items()}, + } + aggregations = { + col: agg + for col, agg in aggregations.items() + if col in joined and not col in group_cols + } + # print("AGGREGATIONS:") + # pprint(aggregations) + + if set(joined.columns.tolist()) - set(group_cols) != set(aggregations.keys()): + pprint( + (set(joined.columns.tolist()) - set(group_cols)).symmetric_difference( + set(aggregations.keys()) + ) + ) + raise ValueError + + # def add_no_kernel_exec_time(df): + # # print(df[preview_cols].T) + # assert len(df) >= 2, "have no kernel row and at least one kernel for the config" + # valid_kernels = ~df["kernel_name"].isna() + # no_kernel = df[~valid_kernels] + # assert len(no_kernel) == 1 + # num_valid_kernels = valid_kernels.sum() + # assert num_valid_kernels >= 1 + # serial_delta = float(no_kernel["exec_time_sec_serial"].iloc[0]) / num_valid_kernels + # parallel_delta = float(no_kernel["exec_time_sec_parallel"].iloc[0]) / num_valid_kernels + # df.loc[valid_kernels, "exec_time_sec_serial"] += serial_delta + # df.loc[valid_kernels, "exec_time_sec_parallel"] += parallel_delta + # return df + # + # joined = joined.groupby( + # group_cols + ["run"], dropna=False).apply( + # add_no_kernel_exec_time).reset_index(drop=True) + + # # remove non kernel stats + # grouped = joined[~joined["kernel_name"].isna()].groupby(group_cols, dropna=False) + grouped = joined.groupby(group_cols, dropna=False) + + # this is just for checking things + def _inspect(df): + # print(df) + # print(df.columns) + # print(df.index) + configs = df[["input_id_parallel", "input_id_serial"]].drop_duplicates() + if not len(configs) == 1: + print("WARN", configs) + # assert len(configs) == 1 + + if not all_benchmarks: + assert len(df["input_id_serial"].unique()) == 1 + # print("num runs", len(df["run"].unique())) + pass + + grouped[joined.columns].apply(_inspect) + + aggregated = grouped.agg(aggregations, squeeze=False) + + + # speedup + def compute_speedup(df): + # only count speedup for large enough inputs + exec_time_sec_serial = df["exec_time_sec_serial"] + exec_time_sec_parallel = df["exec_time_sec_parallel"] + exec_time_sec_parallel = df[ + ["exec_time_sec_serial", "exec_time_sec_parallel"] + ].min(axis=1) + # print(df[["benchmark", "target", "input_id_serial", "input_id_parallel", "run", "mean_blocks_per_sm_parallel", "exec_time_sec_serial", "exec_time_sec_parallel"]]) + return metrics.speedup( + baseline=exec_time_sec_serial, values=exec_time_sec_parallel + ).mean() + + if True: + # exec time speedup + aggregated["exec_time_sec_speedup"] = grouped[joined.columns].apply(compute_speedup) + + # cycles error + aggregated["cycles_mape"] = grouped[joined.columns].apply( + lambda df: metrics.mape( + true_values=df["cycles_serial"], values=df["cycles_parallel"] + ) + ) + + # l1 hit rate error + aggregated["l1_hit_rate_mae"] = grouped[joined.columns].apply( + lambda df: metrics.abs_err( + true_values=df["l1_hit_rate_serial"], values=df["l1_hit_rate_parallel"] + ).mean() + ) + + # l2 hit rate error + aggregated["l2_hit_rate_mae"] = grouped[joined.columns].apply( + lambda df: metrics.abs_err( + true_values=df["l2_hit_rate_serial"], values=df["l2_hit_rate_parallel"] + ).mean() + ) + + # dram reads error + aggregated["dram_reads_smape"] = grouped[joined.columns].apply( + lambda df: metrics.smape( + true_values=df["dram_reads_serial"], values=df["dram_reads_parallel"] + ) # .mean() + ) + + # dram writes error + aggregated["dram_writes_smape"] = grouped[joined.columns].apply( + lambda df: metrics.smape( + true_values=df["dram_writes_serial"], values=df["dram_writes_parallel"] + ) # .mean() + ) + + else: + # exec time speedup + aggregated["exec_time_sec_speedup"] = metrics.speedup( + baseline=aggregated["exec_time_sec_serial"], + values=aggregated[ + ["exec_time_sec_serial", "exec_time_sec_parallel"] + ].min(axis=1)) + + # cycles error + aggregated["cycles_mape"] = metrics.mape( + true_values=aggregated["cycles_serial"], values=aggregated["cycles_parallel"] + ) + + # l1 hit rate error + aggregated["l1_hit_rate_mae"] = metrics.abs_err( + true_values=aggregated["l1_hit_rate_serial"], + values=aggregated["l1_hit_rate_parallel"] + ) + + + # l2 hit rate error + aggregated["l2_hit_rate_mae"] = metrics.abs_err( + true_values=aggregated["l2_hit_rate_serial"], values=aggregated["l2_hit_rate_parallel"] + ) + + + # dram reads error + aggregated["dram_reads_smape"] = metrics.smape( + true_values=aggregated["dram_reads_serial"], values=aggregated["dram_reads_parallel"] + ) + + + # dram writes error + aggregated["dram_writes_smape"] = metrics.smape( + true_values=aggregated["dram_writes_serial"], values=aggregated["dram_writes_parallel"] + ) + + + # print(aggregated[[ + # "target", + # "benchmark", + # "input_variant", + # "dram_reads_serial", + # "dram_reads_parallel", + # "dram_reads_rel_err", + # "dram_writes_serial", + # "dram_writes_parallel", + # "dram_writes_rel_err", + # ]]) + + aggregated = aggregated.reset_index() + # print( + # aggregated.loc[ + # # 500_000 vectoradd + # aggregated["input_id_serial"] == 210.0, + # preview_cols + # + [ + # "cycles_mape", + # "dram_reads_smape", + # "dram_writes_smape", + # "exec_time_sec_speedup", + # ], + # ][0:4].T.drop_duplicates() + # ) + + # build the table data + assert 8 * benchmarks.BASELINE["num_clusters"] == 224 + + functional_configs: typing.Sequence[typing.Dict[str, typing.Any]] = [ + dict( + input_memory_only=False, + input_num_clusters=benchmarks.BASELINE["num_clusters"], + input_cores_per_cluster=1, + ), + ] + if scale_clusters: + functional_configs += [ + dict( + input_memory_only=False, + input_num_clusters=4 * benchmarks.BASELINE["num_clusters"], + input_cores_per_cluster=1, + ) + ] + else: + functional_configs += [ + dict( + input_memory_only=False, + input_num_clusters=benchmarks.BASELINE["num_clusters"], + input_cores_per_cluster=4, + ) + ] + + selected_benchmarks: typing.Sequence[typing.Dict[str, typing.Any]] = [] + for functional_config in functional_configs: + selected_benchmarks += [ + dict( + name="vectorAdd", + inputs={ + **{"input_dtype": 32, "input_length": 500_000}, + **functional_config, + }, + ), + dict( + name="babelstream", + inputs={ + **{"input_size": 102400}, + **functional_config, + }, + ), + dict( + name="transpose", + inputs={ + # **{"input_variant": "naive", "input_dim": 512}, + **{"input_variant": "coalesced", "input_dim": 512}, + **functional_config, + }, + ), + dict( + name="matrixmul", + inputs={ + **{"input_dtype": 32, "input_rows": 512}, + **functional_config, + }, + ), + dict( + name="simple_matrixmul", + inputs={ + **{ + "input_dtype": 32, + "input_m": 512, + "input_n": 32, + "input_p": 512, + }, + **functional_config, + }, + ), + ] + + + table = "" + + # absolute_exec_time = not all_benchmarks + + if all_benchmarks: + for functional_config in functional_configs: + mask_cols = list(functional_config.keys()) + mask_values = list(functional_config.values()) + mask = (aggregated[mask_cols] == mask_values).all(axis=1) + + # print(aggregated.loc[mask, list( + # ["benchmark", "input_id_serial", "input_id_parallel"] + # + ["mean_blocks_per_sm_serial", "mean_blocks_per_sm_parallel"] + # + ["exec_time_sec_serial", "exec_time_sec_parallel", "exec_time_sec_speedup"] + # )]) + + # return + + total_cores = int(aggregated.loc[mask, "total_cores_parallel"].values[0]) + + num_unique_bench_configs = len(aggregated.loc[mask, ["benchmark", "input_id_serial"]].drop_duplicates()) + label = "Average ({} benchmark configurations) @ {} SM's".format( + num_unique_bench_configs, total_cores + ) + if large: + label += " [blocks/SM > 1]" + # label += " [blocks/SM > 1, {} benchmarks]".format(num_unique_bench_configs) + + table += "%\n%\n" + table += ( + r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" + + label + + r"}} \\ \hline" + + "\n" + ) + + print("=> functional config: {}".format(functional_config)) + + # num_bench_configs = num_benchmarks # todo + table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( + aggregated[mask], + # num_bench_configs=num_bench_configs, + verbose=verbose, + # all_benchmarks=True + ) + + table += "%\n%\n" + + table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) + for row in table_rows: + bold_values = [] + if row.metric == r"exec\\time": + bold_values = [np.amax(row.values())] + # bold_values = [np.amin(row.values())] + # if absolute_exec_time: + # # when exec time is absolute, take minimum + # bold_values = [np.amin(row.values())] + # else: + # # when exec time is speedup, take maximum + # bold_values = [np.amax(row.values())] + if verbose: + print(row.metric, bold_values, row.values()) + table += write_table_row(row, bold_values) + + else: + for bench_config in selected_benchmarks: + bench_inputs: typing.Dict[str, typing.Any] = bench_config["inputs"] + if not all(aggregated["benchmark"] == bench_config["name"]): + # print( + # "SKIP: want {} (have {})".format( + # aggregated["benchmark"][0], bench_config["name"] + # ) + # ) + continue + + print("") + print( + color("==> {} {}".format(bench_config["name"], bench_inputs), fg="cyan") + ) + + mask_cols = ["benchmark"] + list(bench_inputs.keys()) + mask_values = [bench_name] + list(bench_inputs.values()) + + mask = (aggregated[mask_cols] == mask_values).all(axis=1) + # test_df = aggregated.loc[ + # mask, + # benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS + # + bench_input_cols + # + ["mean_blocks_per_sm_parallel"], + # ] + # test_df = test_df.drop_duplicates() + # print(test_df) + # assert len(test_df) == 1 + + table += "%\n%\n" + label = str( + compute_table_row_label(bench_config, aggregated.loc[mask].iloc[0]) + ) + table += ( + r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" + + label + + r"}} \\ \hline" + + "\n" + ) + + # print(aggregated.loc[mask, list( + # ["benchmark", "input_id_serial", "input_id_parallel"] + # + ["mean_blocks_per_sm_serial", "mean_blocks_per_sm_parallel"] + # + ["exec_time_sec_serial", "exec_time_sec_parallel", "exec_time_sec_speedup"] + # )]) + + + # assert len(aggregated.loc[mask, ["target", "benchmark", "input_id_serial"]].drop_duplicates()) == 1 + num_unique_bench_configs = len(aggregated.loc[mask, ["benchmark", "input_id_serial"]].drop_duplicates()) + assert num_unique_bench_configs == 1 + table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( + aggregated[mask], + # num_bench_configs=1, # all_benchmarks=False + verbose=verbose, + ) + + table += "%\n%\n" + + table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) + for row in table_rows: + bold_values = [] + if row.metric == r"exec\\time": + bold_values = [np.amin(row.values())] + # if absolute_exec_time: + # bold_values = [np.amin(row.values())] + # else: + # bold_values = [np.amax(row.values())] + if verbose: + print( + "writing table row {:<30} values={} bold={}".format( + row.metric, row.values(), bold_values + ) + ) + table += write_table_row(row, bold_values) + + # add averaged row + for functional_config in functional_configs: + mask_cols = list(functional_config.keys()) + mask_values = list(functional_config.values()) + mask = (aggregated[mask_cols] == mask_values).all(axis=1) + + total_cores = int(aggregated.loc[mask, "total_cores_parallel"].values[0]) + + if verbose: + print( + color( + "==> AVERAGE for {:<4} SM's {}".format( + total_cores, functional_config + ), + fg="cyan", + ) + ) + + num_unique_bench_configs = len(aggregated.loc[mask, ["benchmark", "input_id_serial"]].drop_duplicates()) + if num_unique_bench_configs == 1: + # does not make sense to average this, we have this in the + # previous section + continue + # label = "Average @ {} SM's".format(total_cores) + # unique_bench_names = [bench_config["name"] for bench_config in selected_benchmarks] + unique_bench_names = sorted( + [ + benchmarks.benchmark_name_human_readable(name) + for name in aggregated.loc[mask, "benchmark"].unique() + ] + ) + label = "Average {} ({} configurations) @ {} SM's".format( + ", ".join(unique_bench_names), + num_unique_bench_configs, + total_cores, + ) + if large: + label += " [blocks/SM > 1]" + # label += " [blocks/SM > 1, {} benchmarks]".format(num_unique_bench_configs) + + assert "_" not in label + + table += "%\n%\n" + table += ( + r"\rowcolor{gray!10} \multicolumn{6}{c}{\textbf{" + + label + + r"}} \\ \hline" + + "\n" + ) + + # assert num_benchmarks == 1 + # num_configs = len(aggregated.loc[mask, all_input_cols].drop_duplicates()) + table_rows: typing.Sequence[ParallelTableRow] = build_parallel_table_rows( + aggregated[mask], + # num_bench_configs=num_configs, # all_benchmarks=True + verbose=verbose, + ) + table += "%\n%\n" + + table_rows = sorted(table_rows, key=lambda row: (row.metric, row.threads)) + for row in table_rows: + bold_values = [] + if row.metric == r"exec\\time": + # if absolute_exec_time: + # # when exec time is absolute, take minimum + # bold_values = [np.amin(row.values())] + # else: + # # when exec time is speedup, take maximum + bold_values = [np.amax(row.values())] + + # print(row.metric, bold_values, row.values()) + if verbose: + print( + "writing table row {:<30} values={} bold={}".format( + row.metric, row.values(), bold_values + ) + ) + table += write_table_row(row, bold_values) + + clipboard_table = r""" +{\renewcommand{\arraystretch}{1.5}% +\begin{tabularx}{\textwidth}{zs|s|z|zz} +& & \multicolumn{1}{c|}{Serial} & \multicolumn{1}{c|}{Deterministic} & \multicolumn{2}{c}{Nondeterministic} \\ +& & & & \multicolumn{1}{c}{$n=5$} & \multicolumn{1}{c}{$n=10$} \\ \hline +""" + + clipboard_table += table + clipboard_table += r""" +\end{tabularx}} +\end{table} + """ + + if not batch: + print(clipboard_table) + utils.copy_to_clipboard(clipboard_table) + print("copied table to clipboard") + + + caption = r"Average relative speedup and percentage error for serial and parallel simulation using \textsc{gpucachesim} on selected simulation output metrics using $t$ threads." + + tex_code = r""" +\documentclass[preview]{standalone} +""" + tex_code += utils.TEX_PACKAGES + tex_code += r""" +\begin{document} +""" + + tex_code += r""" +\begin{table}[tbh] +\fontsize{8}{10}\selectfont +\footnotesize""" + tex_code += r"\caption{\small " + caption + r"}" + tex_code += r""" +\centering +% \setlength\extrarowheight{2pt} +% \rowcolors{2}{white}{gray!20} +{\renewcommand{\arraystretch}{1.5}% +\begin{tabularx}{\textwidth}{zs|s|z|zz} +& +& \multicolumn{1}{c|}{Serial} +& \multicolumn{1}{c|}{Deterministic} +& \multicolumn{2}{c}{Nondeterministic} \\ +& & & & \multicolumn{1}{c}{$n=5$} & \multicolumn{1}{c}{$n=10$} \\ \hline +""" + tex_code += table + tex_code += r""" +\end{tabularx}} +\end{table} +""" + tex_code += r""" +\end{document} +""" + + filename = "parallel_table" + if all_benchmarks: + filename += "_all" + else: + filename += "_{}".format(bench_name) + if scale_clusters: + filename += "_scaled_clusters" + if large: + filename += "_large" + pdf_output_path = (plot.TABLE_DIR / filename).with_suffix(".pdf") + try: + utils.render_latex(tex_code, output_path=pdf_output_path) + except Exception as e: + print(tex_code) + raise e + print(color("wrote {}".format(pdf_output_path), fg="cyan")) + + if png: + png_output_path = (plot.TABLE_DIR / "png" / filename).with_suffix(".png") + utils.convert_to_png(input_path=pdf_output_path, output_path=png_output_path) + print(color("wrote {}".format(png_output_path), fg="cyan")) diff --git a/gpucachesim/stats/result_table.py b/gpucachesim/stats/result_table.py new file mode 100644 index 00000000..6bd97202 --- /dev/null +++ b/gpucachesim/stats/result_table.py @@ -0,0 +1,626 @@ +import enum +import typing +import itertools +import numpy as np +import pandas as pd +from wasabi import color +from pathvalidate import sanitize_filename + +import gpucachesim.stats.agg +import gpucachesim.stats.metrics as metric_funcs +import gpucachesim.benchmarks as benchmarks +import gpucachesim.utils as utils +import gpucachesim.plot as plot + + +class ErrorMetric(enum.Enum): + MAPE = "MAPE" + SMAPE = "SMAPE" + MAE = "MAE" + Correlation = "Corr." + EMALE = "EMALE" + ERMSLE = "ERMSLE" + # RelErr = "Rel err." + + # MAPE = ("mape", "MAPE") + # Correlation = ("corr", "Corr.") + # RelErr = ("rel_err", "Rel err.") + + +# from collections import namedtuple + +# ErrorMetric = namedtuple('ErrorMetric', ['value', 'label', 'column']) + +# class ErrorMetrics(enum.Enum): +# +# @property +# def column(self): +# return self.value.column +# +# yellow = ErrorMetric(1, 'Yellow') +# green = Color(2, 'Green') + + +def result_table(df, bench_name: typing.Optional[str]=None, metrics: typing.Optional[typing.Union[str, typing.List[typing.Optional[str]]]]=None, combined_only=False, verbose=False, batch=False, png=False): + # remove non-kernel results + df = df[~df["kernel_name"].isna()] + + # target benchmark histogram + target_bench_input_count_hist = ( + df[["target", "benchmark", "input_id"]] + .drop_duplicates() + .value_counts(["target", "benchmark"], dropna=False) + .sort_index() + ) + if verbose: + print(target_bench_input_count_hist) + + target_dfs = gpucachesim.stats.agg.split_into_target_dfs(df, per_kernel=False, mean=True) + native_df = target_dfs.native_df + accelsim_df = target_dfs.accelsim_df + serial_gpucachesim_df = target_dfs.serial_gpucachesim_df + serial_gpucachesim_mem_only_df = target_dfs.serial_gpucachesim_mem_only_df + serial_gpucachesim_exec_driven_df = target_dfs.serial_gpucachesim_exec_driven_df + + class Metric(typing.TypedDict): + label: str + is_percent: bool + error_metrics: typing.Sequence[typing.Tuple[str, ErrorMetric]] + + benches = sorted(df["benchmark"].unique().tolist()) + all_metrics = [ + Metric( + label="DRAM reads", + is_percent=False, + error_metrics=[ + ("dram_reads", ErrorMetric.EMALE), + ("dram_reads_percent", ErrorMetric.MAPE), + ("dram_reads", ErrorMetric.Correlation), + ], + ), + Metric( + label="DRAM writes", + is_percent=False, + error_metrics=[ + ("dram_writes", ErrorMetric.EMALE), + ("dram_writes_percent", ErrorMetric.MAPE), + ("dram_writes", ErrorMetric.Correlation), + ], + ), + Metric( + label="L1 Accesses", + is_percent=False, + error_metrics=[ + ("l1_accesses", ErrorMetric.EMALE), + ("l1_accesses", ErrorMetric.MAPE), + ("l1_accesses", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2 Accesses", + is_percent=False, + error_metrics=[ + ("l2_accesses", ErrorMetric.EMALE), + ("l2_accesses", ErrorMetric.MAPE), + ("l2_accesses", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2 reads", + is_percent=False, + error_metrics=[ + ("l2_reads", ErrorMetric.EMALE), + ("l2_reads", ErrorMetric.MAPE), + ("l2_reads", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2 writes", + is_percent=False, + error_metrics=[ + ("l2_writes", ErrorMetric.EMALE), + ("l2_writes", ErrorMetric.MAPE), + ("l2_writes", ErrorMetric.Correlation), + ], + ), + Metric( + label="L1D hitrate", + is_percent=True, + error_metrics=[ + ("l1_global_hit_rate", ErrorMetric.EMALE), + ("l1_global_hit_rate", ErrorMetric.MAE), + ("l1_global_hit_rate", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2D hitrate", + is_percent=True, + error_metrics=[ + ("l2_hit_rate", ErrorMetric.EMALE), + ("l2_hit_rate", ErrorMetric.MAE), + ("l2_hit_rate", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2D read hitrate", + is_percent=True, + error_metrics=[ + ("l2_read_hit_rate", ErrorMetric.EMALE), + ("l2_read_hit_rate", ErrorMetric.MAE), + ("l2_read_hit_rate", ErrorMetric.Correlation), + ], + ), + Metric( + label="L2D write hitrate", + is_percent=True, + error_metrics=[ + ("l2_write_hit_rate", ErrorMetric.EMALE), + ("l2_write_hit_rate", ErrorMetric.MAE), + ("l2_write_hit_rate", ErrorMetric.Correlation), + ], + ), + Metric( + label="Cycles", + is_percent=False, + error_metrics=[ + # ("cycles", ErrorMetric.RelErr), + ("cycles", ErrorMetric.EMALE), + ("cycles", ErrorMetric.ERMSLE), + ("cycles", ErrorMetric.SMAPE), + ("cycles", ErrorMetric.MAPE), + ("cycles", ErrorMetric.Correlation), + ], + ), + ] + + if metrics is None: + metrics_keys = [] + elif isinstance(metrics, str): + metrics_keys = [metrics] + elif isinstance(metrics, list): + metrics_keys = metrics + else: + raise ValueError("metrics must be either a string or list of strings, have {}".format(metrics)) + + metrics_keys = [metric.replace(" ", "").lower() for metric in metrics_keys if metric is not None] + + if len(metrics_keys) == 0: + # only show cycles by default + selected_metrics = [all_metrics[-1]] + else: + selected_metrics = [ + m for m in all_metrics if m["label"].replace(" ", "").lower() in metrics_keys + ] + if len(selected_metrics) == 0: + raise ValueError( + "invalid metrics {} ({}), have {}", + metrics, + metrics_keys, + [m["label"].replace(" ", "").lower() for m in all_metrics], + ) + + if verbose: + print("\n") + print( + "computing {} metrics: {} for {} benches: {}".format( + len(selected_metrics), + [m["label"] for m in selected_metrics], + len(benches), + benches, + ) + ) + + # dtypes = { + # **{col: "float64" for col in native_df.columns}, + # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, + # } + # dtypes = {col: dtype for col, dtype in dtypes.items() if col in native_df} + # native_df = native_df.astype(dtypes) + + dtypes = dict() + sim_targets = { + "accelsim": accelsim_df.astype(dtypes), + "gpucachesim": serial_gpucachesim_df.astype(dtypes), + "gpucachesim_mem_only": serial_gpucachesim_mem_only_df.astype(dtypes), + "gpucachesim_exec_driven": serial_gpucachesim_exec_driven_df.astype(dtypes), + } + + for target, sim_df in sim_targets.items(): + if verbose: + print("computing =>", target) + # print(sim_df[benchmarks.PREVIEW_COLS][:4].T) + join_cols = list( + # we do NOT join based on target + ["benchmark", "kernel_launch_id"] + + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) + # we do NOT join based on input_memory_only + + ["input_num_clusters", "input_cores_per_cluster"], + ) + join_cols = [col for col in join_cols if col in df] + # pprint(join_cols) + + missing_df = ( + native_df[join_cols] + .merge( + sim_df[join_cols], + how="left", + indicator=True, + ) + .loc[lambda x: x["_merge"] != "both"] + ) + if len(missing_df) > 0: + # if target == "_gpucachesim_parallel": + # # temp: ignore for now + # pass + if target == "gpucachesim_exec_driven": + # we do not have an exec driven version of babelstream + missing_exec_driven_benches = sorted(missing_df["benchmark"].unique().tolist()) + if missing_exec_driven_benches != ["babelstream"]: + print("MISSING {}".format(missing_df.shape)) + print(missing_df) + raise ValueError( + "missing exec driven {} but should only miss babelstream".format(missing_exec_driven_benches) + ) + else: + print("MISSING {}".format(missing_df.shape)) + print(missing_df) + assert len(missing_df) == 0 + + joined_df = native_df.merge( + sim_df, + on=join_cols, + how="left", + suffixes=(None, "_" + target), + ) + assert joined_df.shape[0] == native_df.shape[0] + if len(joined_df) == 0: + raise ValueError("joined dataframe is empty") + + native_df = joined_df + # break + + for target in list(sim_targets.keys()) + [""]: + suffix = ("_" + target) if target != "" else "" + native_df["dram_reads_percent" + suffix] = native_df["dram_reads" + suffix].fillna(0.0) + scale = native_df[["num_global_loads", "num_global_stores"]].max(axis=1) + 0.00001 + native_df["dram_reads_percent" + suffix] /= scale + native_df["dram_writes_percent" + suffix] = native_df["dram_writes" + suffix].fillna(0.0) + native_df["dram_writes_percent" + suffix] /= scale + assert (native_df["dram_writes_percent" + suffix] <= 1.0).all() + assert (native_df["dram_reads_percent" + suffix] <= 1.0).all() + + assert all([col in native_df for col, _ in utils.flatten([m["error_metrics"] for m in selected_metrics])]) + + # preview_cols = [ + # "benchmark", + # "input_id", + # "num_global_loads", + # "num_global_stores", + # ] + [ + # col + suffix + # for col, suffix in itertools.product( + # # ["cycles"], + # # ["dram_writes", "dram_writes_percent"], + # # ["dram_reads", "dram_reads_percent"], + # ["l1_accesses"], + # # [""] + list(sim_targets.keys()) + # ["", "_accelsim", "_gpucachesim"], + # ) + # ] + # print(native_df[preview_cols]) + + if verbose: + for metric in selected_metrics: + metric_cols = sorted(list(set([metric_col for metric_col, _ in metric["error_metrics"]]))) + print("==> PREVIEW: {}".format(metric_cols)) + preview_cols = [ + "benchmark", + "input_id", + # "num_global_loads", + # "num_global_stores", + ] + [ + col + "_" + target + for col, target in itertools.product( + metric_cols, + [""] + list(sim_targets.keys()), + # ["", "_accelsim", "_gpucachesim"], + ) + ] + print(native_df[preview_cols]) + + + if bench_name is None and combined_only: + selected_benches = [None] + elif bench_name is None: + selected_benches = benches + [None] + else: + selected_benches = benches + + table = "" + for bench in selected_benches: + if bench is None: + header_label = "Combined" + else: + header_label = benchmarks.benchmark_name_human_readable(bench) + + table += r"\rowcolor{gray!10}" + table += r"\multicolumn{6}{c}{\textbf{" + header_label + r"}} \\" + if bench is None: + table += r"\hline \hline" + else: + table += r"\hline" + table += "\n" + + for metric in selected_metrics: + if verbose: + print(bench, metric["label"]) + + if bench is not None: + bench_df = native_df[native_df["benchmark"] == bench] + else: + bench_df = native_df + # continue + + table += r"\multirow{" + str(len(metric["error_metrics"])) + "}{*}{" + table += " ".join(str(metric["label"]).split("_")) + table += "} \n" + + for metric_col, error_metric in metric["error_metrics"]: + preview_cols = ["benchmark"] + [ + col + "_" + target + for col, target in itertools.product([metric_col], [""] + list(sim_targets.keys())) + ] + + bench_df = bench_df.copy() + if bench is not None and verbose: + print(bench_df[preview_cols + benchmarks.BENCHMARK_INPUT_COLS[bench]].fillna(0.0)) + print(bench_df.shape) + + error_values: pd.DataFrame + + metric_is_percent = metric["is_percent"] + value_scale = 100.0 if metric_is_percent else 1.0 + + match error_metric: + case ErrorMetric.Correlation: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + atol = 1.0 if metric_is_percent else 0.1 + error = metric_funcs.correlation(true_values=true_values, values=values, atol=atol) + bench_df[metric_col + "_" + error_metric.name.lower() + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values = error_values.mean(axis=1) + + # case ErrorMetric.RelErr: + # error_values = [] + # for suffix in sim_targets.keys(): + # true_values=bench_df[metric_col] + # values=bench_df[metric_col + suffix] + # error = rel_err(true_values=true_values, values=values) + # bench_df[metric_col + "_" + error_metric.name.lower() + suffix] = error + # error_values.append(error) + # error_values = pd.DataFrame(error_values) + # error_values = error_values.mean(axis=1) + # # error_values *= 100.0 + + case ErrorMetric.EMALE: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + error = metric_funcs.emale(true_values=true_values, values=values) + bench_df[metric_col + "_" + error_metric.name.lower() + "_" + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values = error_values.mean(axis=1) + + case ErrorMetric.ERMSLE: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + error = metric_funcs.ermsle(true_values=true_values, values=values) + bench_df[metric_col + "_" + error_metric.name.lower() + "_" + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values = error_values.mean(axis=1) + + case ErrorMetric.MAE: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + error = metric_funcs.abs_err(true_values=true_values, values=values) + bench_df[metric_col + "_" + error_metric.name.lower() + "_" + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values = error_values.mean(axis=1) + + case ErrorMetric.SMAPE: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + error = metric_funcs.smape(true_values=true_values, values=values) + bench_df[metric_col + "_" + error_metric.name.lower() + "_" + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values *= 100.0 + error_values = error_values.mean(axis=1) + + case ErrorMetric.MAPE: + error_values = [] + for target in sim_targets.keys(): + true_values = bench_df[metric_col] * value_scale + values = bench_df[metric_col + "_" + target] * value_scale + error = metric_funcs.mape(true_values=true_values, values=values) + bench_df[metric_col + "_" + error_metric.name.lower() + "_" + target] = error + error_values.append(error) + error_values = pd.DataFrame(error_values) + error_values *= 100.0 + error_values = error_values.mean(axis=1) + # error_values = error_values.aggregate(scipy.stats.gmean, axis=1) + # .apply(np.exp) + # error_values = pd.DataFrame([ + # abs_err( + # true_values=bench_df[metric_col], + # values=bench_df[metric_col + suffix] + # ) for suffix in sim_targets.keys() + # ]) + # keys = [ + # metric_col + "_" + error_metric.name.lower() + suffix + # for suffix in sim_targets.keys() + # ] + # # print(keys) + # print(error_values.shape) + # bench_df[keys] = error_values.to_numpy().ravel() + # error_values = error_values.mean(axis=1) + case _: + raise ValueError("unknown error metric {}".format(error_metric.name)) + + # assert isinstance(error_values, (np.ndarray, pd.Series)) + for col, target in enumerate(sim_targets.keys()): + valid = not np.isnan(bench_df[metric_col + "_" + target]).all() + if not valid: + error_values[col] = np.nan + + table += r" & {} ".format(error_metric.value) + if verbose: + print(error_metric.name) + print(error_values) + for value in error_values: + table += " & " + if np.isnan(value): + continue + match error_metric: + case ErrorMetric.Correlation: + if value == np.nanmax(error_values): + table += r"\boldmath" + table += "${:5.3f}$".format(value) + # case ErrorMetric.RelErr: + # if value == np.nanmin(error_values): + # table += r"\boldmath" + # table += "${:5.2f}\\%$".format(value) + # case ErrorMetric.MALE: + # if value == np.nanmin(error_values): + # table += r"\boldmath" + # table += "${}\\%$".format( + # plot.human_format_thousands(value) + # ) + # case ErrorMetric.SMAPE: + # if value == np.nanmin(error_values): + # table += r"\boldmath" + # table += "${}\\%$".format( + # plot.human_format_thousands(value) + # ) + case ErrorMetric.SMAPE | ErrorMetric.MAPE: + if value == np.nanmin(error_values): + table += r"\boldmath" + table += "${}\\%$".format(plot.human_format_thousands(value)) + case ErrorMetric.EMALE | ErrorMetric.ERMSLE | ErrorMetric.MAE: + if value == np.nanmin(error_values): + table += r"\boldmath" + if metric_is_percent: + table += "${:5.2f}\\%$".format(value) + else: + table += "${}$".format(plot.human_format_thousands(value)) + + table += r"\\" + "\n" + + # if not accelsim_valid: + # metric_row[0] = np.nan + # if not gpucachesim_valid: + # metric_row[1] = np.nan + # if not gpucachesim_mem_only_valid: + # metric_row[2] = np.nan + # if not gpucachesim_exec_valid: + # metric_row[3] = np.nan + + if bench is not None and verbose: + print( + bench_df[ + # + [sim + "_rel_err" for sim in ["accelsim", "gpucachesim"]] + # + [sim + "_rmse" for sim in ["accelsim", "gpucachesim"]] + preview_cols + + [ + metric_col + "_" + error_metric.name.lower() + "_" + target + for target in ["accelsim", "gpucachesim"] + ] + # + [sim + "_rpd" for sim in ["accelsim", "gpucachesim"]] + ].fillna(0.0) + ) + + # if bench is not None: + table += r" \hline" + table += "\n" + + table += "%\n%\n" + + if not batch: + print("") + print(table) + utils.copy_to_clipboard(table) + print("copied table to clipboard") + + tex_code = r""" +\documentclass[preview]{standalone} +""" + tex_code += utils.TEX_PACKAGES + tex_code += r""" +\begin{document} +""" + + tex_code += r""" +\begin{table}[htbp] +\fontsize{8}{10}\selectfont +\footnotesize +""" + caption = "Results" + tex_code += r"\caption{\small " + caption + "}" + tex_code += r""" +\centering +% \setlength\extrarowheight{2pt} +% \rowcolors{2}{white}{gray!20} +{\renewcommand{\arraystretch}{1.5}% +\begin{tabularx}{\textwidth}{ss|z|z|z|z} +& & \shortstack[t]{\textsc{AccelSim}} + & \shortstack[t]{\textsc{gpucachesim}} + & \shortstack[c]{\textsc{gpucachesim}\\\textit{(memory only)}} + & \shortstack[c]{\textsc{gpucachesim}\\\textit{(trace reconstr.)}} \\ +\hline +""" + tex_code += table + tex_code += r""" +% +\end{tabularx}} +\end{table} +""" + tex_code += r""" +\end{document} +""" + + filename = "result_table" + if bench_name is None: + filename += "_all" + else: + filename += "_{}".format(bench_name) + if combined_only: + filename += "_combined_only" + filename += "_{}".format("_".join([metric["label"].lower().replace(" ", "_") for metric in selected_metrics])) + filename = sanitize_filename(filename) + pdf_output_path = (plot.TABLE_DIR / filename).with_suffix(".pdf") + try: + utils.render_latex(tex_code, output_path=pdf_output_path) + pass + except Exception as e: + print(tex_code) + raise e + print(color("wrote {}".format(pdf_output_path), fg="cyan")) + + if png: + png_output_path = (plot.TABLE_DIR / "png" / filename).with_suffix(".png") + utils.convert_to_png(input_path=pdf_output_path, output_path=png_output_path) + print(color("wrote {}".format(png_output_path), fg="cyan")) diff --git a/gpucachesim/stats/speed_table.py b/gpucachesim/stats/speed_table.py new file mode 100644 index 00000000..03c772d0 --- /dev/null +++ b/gpucachesim/stats/speed_table.py @@ -0,0 +1,361 @@ +import copy +import numpy as np +import pandas as pd +import itertools +from pprint import pprint +from wasabi import color + +import gpucachesim.stats.agg +import gpucachesim.plot as plot +import gpucachesim.stats.metrics as metrics +import gpucachesim.benchmarks as benchmarks +import gpucachesim.utils as utils + + +def choose_fastest_parallel_implementation(df) -> pd.DataFrame: + bench_input_cols = copy.deepcopy(list(benchmarks.ALL_BENCHMARK_INPUT_COLS)) + # note, we do NOT group by SIMULATE_EXECUTION_CONFIG_COLS or SIMULATE_INPUT_COLS. + # this means we do NOT group on input_mode, input_run_ahead, or input_threads + functinoal_input_cols = copy.deepcopy(benchmarks.SIMULATE_FUNCTIONAL_CONFIG_COLS) + input_config_group_cols = ["target", "benchmark"] + functinoal_input_cols + bench_input_cols + input_config_group_cols = [col for col in input_config_group_cols if col in df] + + group_cols = input_config_group_cols + ["run"] + min_exec_times = df.groupby(group_cols, dropna=False)["exec_time_sec"].transform("min") + df = df[df["exec_time_sec"] == min_exec_times] + return df + + +def speed_table(df, bench_name, include_mean_time=False, verbose=False, batch=False, png=False): + # remove non-kernel results + no_kernel_mask = df["kernel_name"].isna() + selected_df = df[~no_kernel_mask] + + # print(selected_df.loc[ + # (selected_df["target"] == Target.Simulate.value) + # & (selected_df["input_id"] == 210), + # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) + + # print(selected_df.loc[ + # (selected_df["target"] == Target.AccelsimSimulate.value) + # & (selected_df["input_id"] == 3), + # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) + + target_dfs = gpucachesim.stats.agg.split_into_target_dfs(selected_df, per_kernel=False, mean=True) + + # print(target_dfs.serial_gpucachesim_df.loc[ + # target_dfs.serial_gpucachesim_df["input_id"] == 210, + # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) + + # print(target_dfs.accelsim_df.loc[ + # target_dfs.accelsim_df["input_id"] == 3, + # benchmarks.PREVIEW_COLS + ["cycles", "exec_time_sec"]].T) + + native_df = target_dfs.native_df + accelsim_df = target_dfs.accelsim_df + serial_gpucachesim_df = target_dfs.serial_gpucachesim_df + serial_gpucachesim_mem_only_df = target_dfs.serial_gpucachesim_mem_only_df + serial_gpucachesim_exec_driven_df = target_dfs.serial_gpucachesim_exec_driven_df + parallel_gpucachesim_df = choose_fastest_parallel_implementation(target_dfs.parallel_gpucachesim_df) + print("{:>50}\t{}".format("fastest parallel gpucachesim", parallel_gpucachesim_df.shape)) + + benches = sorted(selected_df["benchmark"].unique().tolist()) + + # dtypes = { + # **{col: "float64" for col in native_df.columns}, + # **{col: "object" for col in benchmarks.NON_NUMERIC_COLS.keys()}, + # } + # dtypes = {col: dtype for col, dtype in dtypes.items() if col in native_df} + # native_df = native_df.astype(dtypes) + + dtypes = dict() + sim_targets = { + "accelsim": accelsim_df.astype(dtypes), + "gpucachesim": serial_gpucachesim_df.astype(dtypes), + "gpucachesim_mem_only": serial_gpucachesim_mem_only_df.astype(dtypes), + "gpucachesim_exec_driven": serial_gpucachesim_exec_driven_df.astype(dtypes), + "gpucachesim_parallel": parallel_gpucachesim_df.astype(dtypes), + } + + if verbose: + print("\n") + + for target, sim_df in sim_targets.items(): + if verbose: + print("computing =>", target) + # print(sim_df[benchmarks.PREVIEW_COLS][:4].T) + join_cols = list( + # we do NOT join based on target + ["benchmark", "kernel_launch_id"] + + list(benchmarks.ALL_BENCHMARK_INPUT_COLS) + # we do NOT join based on input_memory_only + + ["input_num_clusters", "input_cores_per_cluster"], + ) + join_cols = [col for col in join_cols if col in selected_df] + # pprint(join_cols) + + missing_df = ( + native_df[join_cols] + .merge( + sim_df[join_cols], + how="left", + indicator=True, + ) + .loc[lambda x: x["_merge"] != "both"] + ) + if len(missing_df) > 0: + if target == "gpucachesim_parallel": + # temp: ignore for now + pass + elif target == "gpucachesim_exec_driven": + # we do not have an exec driven version of babelstream + missing_exec_driven_benches = sorted(missing_df["benchmark"].unique().tolist()) + if missing_exec_driven_benches != ["babelstream"]: + print("MISSING {}".format(missing_df.shape)) + print(missing_df) + raise ValueError( + "missing exec driven {} but should only miss babelstream".format(missing_exec_driven_benches) + ) + else: + print("MISSING {}".format(missing_df.shape)) + print(missing_df) + assert len(missing_df) == 0 + + joined_df = native_df.merge( + sim_df, + on=join_cols, + how="left", + suffixes=(None, "_" + target), + ) + assert joined_df.shape[0] == native_df.shape[0] + if len(joined_df) == 0: + raise ValueError("joined dataframe is empty") + + native_df = joined_df + + native_df["exec_time_nsec"] = native_df["exec_time_sec"] * 1e9 + # preview_metrics = ["cycles", "instructions", "exec_time_sec", "input_id"] + preview_metrics = ["input_id", "kernel_name", "exec_time_sec"] + preview_cols = ["benchmark", "exec_time_nsec"] + [ + col + "_" + target for col, target in itertools.product(preview_metrics, [""] + list(sim_targets.keys())) + ] + + all_slowdowns_over_native = [] + + table = "" + for bench in benches + [None]: + if verbose: + print(bench) + if bench is None: + header_label = "Combined" + else: + header_label = benchmarks.benchmark_name_human_readable(bench) + + table += r"\rowcolor{gray!10}" + table += r"\multicolumn{6}{c}{\textbf{" + header_label + r"}} \\" + if bench is None: + table += r"\hline \hline" + else: + table += r"\hline" + table += "\n" + + # for metric in metrics: + if bench is not None: + bench_df = native_df[native_df["benchmark"] == bench] + else: + bench_df = native_df + + bench_df = bench_df.copy() + if verbose: + print(bench_df[preview_cols + benchmarks.BENCHMARK_INPUT_COLS[bench]]) + print(bench_df.shape) + + table += r"Slowdown" + slowdowns_over_native = [ + metrics.slowdown( + baseline=bench_df["exec_time_sec"], + values=bench_df["exec_time_sec_accelsim"], + ), + metrics.slowdown( + baseline=bench_df["exec_time_sec"], + values=bench_df["exec_time_sec_gpucachesim"], + ), + metrics.slowdown( + baseline=bench_df["exec_time_sec"], + values=bench_df["exec_time_sec_gpucachesim_mem_only"], + ), + metrics.slowdown( + baseline=bench_df["exec_time_sec"], + values=bench_df["exec_time_sec_gpucachesim_exec_driven"], + ), + metrics.slowdown( + baseline=bench_df["exec_time_sec"], + values=bench_df["exec_time_sec_gpucachesim_parallel"], + ), + ] + assert all([len(s) == len(slowdowns_over_native[0]) for s in slowdowns_over_native]) + + if bench is None: + slowdowns_over_native = np.nanmean(slowdowns_over_native, axis=1) + else: + slowdowns_over_native = np.mean(slowdowns_over_native, axis=1) + + all_slowdowns_over_native.append(slowdowns_over_native) + + for slowdown_value in slowdowns_over_native: + table += " & " + if np.isnan(slowdown_value): + continue + bold = np.isfinite(slowdown_value) and slowdown_value == np.nanmin(slowdowns_over_native) + if bold: + table += r"\boldmath" + table += "${}$".format(plot.human_format_thousands(slowdown_value)) + table += r"\\" + "\n" + + table += r"KIPS" + native_kilo_instructions = bench_df["instructions"] / 1000.0 + kips = np.array( + [ + native_kilo_instructions / bench_df["exec_time_sec_accelsim"], + native_kilo_instructions / bench_df["exec_time_sec_gpucachesim"], + (bench_df["instructions_gpucachesim_mem_only"] / 1000.0) + / bench_df["exec_time_sec_gpucachesim_mem_only"], + (bench_df["instructions_gpucachesim_exec_driven"] / 1000.0) + / bench_df["exec_time_sec_gpucachesim_exec_driven"], + native_kilo_instructions / bench_df["exec_time_sec_gpucachesim_parallel"], + ] + ) + + # print("kips:") + # print(kips) + if bench is None: + kips = np.nanmean(kips, axis=1) + else: + kips = np.mean(kips, axis=1) + for kips_value in kips: + table += " & " + if np.isnan(kips_value): + continue + bold = np.isfinite(kips_value) and kips_value == np.nanmax(kips) + if bold: + table += r"\boldmath" + table += "${}$".format(plot.human_format_thousands(kips_value)) + + if include_mean_time: + table += r"\\" + "\n" + table += r"mean time" + mean_time = np.array( + [ + bench_df["exec_time_sec_accelsim"], + bench_df["exec_time_sec_gpucachesim"], + bench_df["exec_time_sec_gpucachesim_mem_only"], + bench_df["exec_time_sec_gpucachesim_exec_driven"], + bench_df["exec_time_sec_gpucachesim_parallel"], + ] + ) + if bench is None: + mean_time = np.nanmean(mean_time, axis=1) + else: + mean_time = np.mean(mean_time, axis=1) + for mean_time_value in mean_time: + table += " & " + if np.isnan(mean_time_value): + continue + bold = np.isfinite(mean_time_value) and mean_time_value == np.nanmin(mean_time) + if bold: + table += r"\boldmath" + table += "${:5.1f}s$".format(mean_time_value) + # table += r"\\" + "\n" + + table += r"\\" + # if bench is not None: + table += r" \hline" + table += "\n" + table += "% \n" + + table += "%\n%\n" + + all_slowdowns_over_native = pd.DataFrame( + np.stack(all_slowdowns_over_native, axis=0), + columns=list(sim_targets.keys()), + ) + + speedup_over_accel = ( + all_slowdowns_over_native["accelsim"].iloc[-1] / all_slowdowns_over_native["gpucachesim_parallel"].iloc[-1] + ) + print( + color( + "Mean speedup over accelsim: {:>6.3f}x".format(speedup_over_accel), + fg="green", + ) + ) + + if not batch: + print("") + print(table) + utils.copy_to_clipboard(table) + print("copied table to clipboard") + + tex_code = r""" +\documentclass[preview]{standalone} +""" + tex_code += utils.TEX_PACKAGES + tex_code += r""" +\begin{document} +""" + + # caption = r"Average relative speedup and percentage error for serial and parallel simulation using \textsc{gpucachesim} on selected simulation output metrics using $t$ threads." + caption = r""" +Simulation speed for different \textsc{gpucachesim} modes and the +popular \textsc{AccelSim} simulator per benchmark. +Measured are relative slowdown over native execution on the +NVIDIA TitanX (Pascal) and absolute simulation rate in kilo +instructions per second (KIPS).""" + + tex_code += r""" +\begin{table}[htbp] +\fontsize{8}{10}\selectfont +\footnotesize +""" + tex_code += r"\caption{\small" + caption + "}" + tex_code += r""" +\centering +% \setlength\extrarowheight{2pt} +% \rowcolors{2}{white}{gray!20} +{\renewcommand{\arraystretch}{1.5}% +\begin{tabularx}{\textwidth}{zz|z|z|z|z} +% Native +& \textsc{AccelSim} +& \shortstack[c]{\textsc{gpucachesim}\\(serial)} +& \shortstack[c]{\textsc{gpucachesim}\\(mem-only)} +& \shortstack[c]{\textsc{gpucachesim}\\(trace recon.)} +& \shortstack[c]{\textsc{gpucachesim}\\(parallel)} \\ \hline +% +""" + tex_code += table + tex_code += r""" +% +\end{tabularx}} +\end{table} +""" + tex_code += r""" +\end{document} +""" + + filename = "speed_table" + if bench_name is None: + filename += "_all" + else: + filename += "_{}".format(bench_name) + pdf_output_path = (plot.TABLE_DIR / filename).with_suffix(".pdf") + try: + utils.render_latex(tex_code, output_path=pdf_output_path) + except Exception as e: + print(tex_code) + raise e + print(color("wrote {}".format(pdf_output_path), fg="cyan")) + + if png: + png_output_path = (plot.TABLE_DIR / "png" / filename).with_suffix(".png") + utils.convert_to_png(input_path=pdf_output_path, output_path=png_output_path) + print(color("wrote {}".format(png_output_path), fg="cyan")) diff --git a/gpucachesim/stats/stats.py b/gpucachesim/stats/stats.py index 3c360868..c7fb98b1 100644 --- a/gpucachesim/stats/stats.py +++ b/gpucachesim/stats/stats.py @@ -272,9 +272,9 @@ def compute_result_df(self): self.result_df["kernel_name"] = self.result_df[ "kernel_function_signature" ].apply( - lambda sig: np.nan - if pd.isnull(sig) - else common.function_name_from_signature(sig) + lambda sig: ( + np.nan if pd.isnull(sig) else common.function_name_from_signature(sig) + ) ) def _compute_l2_read_hit_rate(self): diff --git a/gpucachesim/utils.py b/gpucachesim/utils.py index e5c9ac4c..09c414e9 100644 --- a/gpucachesim/utils.py +++ b/gpucachesim/utils.py @@ -1,6 +1,17 @@ +import typing +import click import platform import pyperclip import numpy as np +from pathlib import Path +from os import PathLike + + +@click.group() +# @click.pass_context +def main(): + # ctx.ensure_object(dict) + pass def flatten(l): @@ -46,3 +57,228 @@ def copy_to_clipboard(value): pyperclip.copy(value) except pyperclip.PyperclipException as e: print("copy to clipboard failed: {}".format(e)) + + +TEX_PACKAGES = r""" +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage[table]{xcolor} + +\usepackage[utf8]{inputenc} % Required for inputting international characters +\usepackage[scaled]{helvet} +\usepackage[T1]{fontenc} % Output font encoding for international characters +\renewcommand\familydefault{\sfdefault} + +\usepackage{graphicx} +\usepackage{tabulary} +\usepackage{tabularx} +\usepackage{listings} + +% \usepackage{lstlinebgrd} +% listings-rust} +\usepackage[norndcorners,customcolors]{hf-tikz} +\hfsetbordercolor{yellow} +\hfsetfillcolor{yellow} + +\usepackage{subcaption} +\usepackage{multirow} +\usepackage{multicol} +\usepackage{makecell} +\usepackage{graphicx} +\usepackage{amsmath} +\usepackage{amsfonts} +\usepackage{layouts} + +% font for checkmark ding symbols +\usepackage{pifont} +\usepackage{makecell} + +% for command line bash code +\usepackage{minted} + +% options in square brackets for enumerate and itemize +\usepackage{enumitem} + +% fix table row colors +\newcounter{tblerows} +\expandafter\let\csname c@tblerows\endcsname\rownum + +% make big, small and tiny column width for tabularx +\newcolumntype{b}{>{\raggedright\arraybackslash}X} +% \newcolumntype{s}{>{\raggedright\arraybackslash\hsize=.6\hsize}X} +% \newcolumntype{d}{>{\raggedright\arraybackslash\hsize=.35\hsize}X} +% \newcolumntype{t}{>{\raggedright\arraybackslash\hsize=.22\hsize}X} +% centered version of t +% \newcolumntype{u}{>{\centering\arraybackslash\hsize=.22\hsize}X} +% right aligned version of s (small) +\newcolumntype{z}{>{\raggedleft\arraybackslash\hsize=.6\hsize}X} +% right aligned version of s (super small) +\newcolumntype{s}{>{\raggedleft\arraybackslash\hsize=.3\hsize}X} +% right aligned version of s (large) +\newcolumntype{Z}{>{\raggedleft\arraybackslash}X} +% centered aligned version of s +\newcolumntype{k}{>{\centering\arraybackslash\hsize=.6\hsize}X} + +\newcommand{\cmark}{\ding{51}} +\newcommand{\xmark}{\ding{55}} +\newcommand*\xor{\oplus} + +\newcommand\todo[1]{\textcolor{red}{TODO: #1}} + +% \newcommand\descitem[1]{\item{\bfseries #1}\\} +\newcommand\descitem[1]{\item{\bfseries #1}} +""" + + +def render_latex(tex_code: str, output_path: PathLike, crop=True): + import gpucachesim.cmd as cmd_utils + import tempfile + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + + # write code to temp dir + tex_input_file = temp_dir / "code.tex" + with open(tex_input_file, "w") as f: + f.write(tex_code) + + # run pdflatex + cmd = [ + "pdflatex", + "-output-directory", + str(temp_dir), + "-interaction", + "nonstopmode", + "-shell-escape", + str(tex_input_file), + ] + try: + retcode, stdout, stderr, _ = cmd_utils.run_cmd( + cmd, + timeout_sec=60, + cwd=temp_dir, + ) + except cmd_utils.ExecStatusError as e: + print("stderr:") + print(e.stderr) + print("stdout:") + print(e.stdout) + tex_log_file = tex_input_file.with_suffix(".log") + with open(tex_log_file, "r") as f: + tex_log = f.read() + # print(tex_log) + # raise e + + # if ret_code != 0: + # print("stdout:") + # print(stdout) + # print("stderr:") + # print(stderr) + # raise ValueError("cmd {} failed with code {}", cmd, ret_code) + + # todo: read the log file of pdflatex... + # todo: copy the resulting pdf file to the output + + tex_output_pdf = tex_input_file.with_suffix(".pdf") + assert tex_output_pdf.is_file() + + if crop: + cmd = ["pdfcrop", str(tex_output_pdf), str(tex_output_pdf)] + cmd_utils.run_cmd( + " ".join(cmd), + timeout_sec=60, + cwd=temp_dir, + ) + + assert tex_output_pdf.is_file() + tex_output_pdf.rename(output_path) + + +MINUTES = 60 + + +def convert_to_png( + input_path: PathLike, + output_path: PathLike, + max_size: typing.Optional[int] = 4096 * 2, + quality=100, + density=300, + timeout_sec=8 * MINUTES, +): + import gpucachesim.cmd as cmd_utils + + input_path = Path(input_path) + assert input_path.is_file() + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + "convert", + ] + if isinstance(density, int): + cmd += [ + "-density", + str(density), + ] + cmd += [str(input_path)] + + if isinstance(quality, int): + cmd += [ + "-quality", + str(quality), + ] + if isinstance(max_size, int): + cmd += [ + "-resize", + "{}x{}".format(max_size, max_size), + ] + cmd += [str(output_path)] + cmd = " ".join(cmd) + _, stdout, stderr, _ = cmd_utils.run_cmd( + cmd, + timeout_sec=timeout_sec, + ) + + +@main.command() +@click.option("-i", "--input", "input_path", help="path to input file") +@click.option("-o", "--output", "output", help="path to output file") +@click.option("-s", "--size", "max_size", help="max size in any dimension") +@click.option("-q", "--quality", "quality", help="quality") +@click.option("-d", "--density", "density", help="density") +def to_png(input_path, output_path, max_size, quality, density): + convert_to_png( + input_path=input_path, + output_path=output_path, + max_size=max_size, + quality=quality, + density=density, + ) + # import gpucachesim.cmd as cmd_utils + + # cmd = [ + # "convert", + # "-density", + # str(density), + # str(input_path), + # "-quality", + # str(quality), + # "-resize", + # "{}x{}".format(max_size, max_size), + # str(output_path), + # ] + # cmd = " ".join(cmd) + # _, stdout, stderr, _ = cmd_utils.run_cmd( + # cmd, + # timeout_sec=1 * 60, + # ) + # print(stdout) + # print(stderr) + + +if __name__ == "__main__": + main() + # main(ctx={}) diff --git a/plot/.gitignore b/plot/.gitignore index 1f0e3925..b2f09a87 100644 --- a/plot/.gitignore +++ b/plot/.gitignore @@ -1,2 +1,4 @@ asm/ cache/ +tables/png/ +equations/png/ diff --git a/test-apps/test-apps-materialized.yml b/test-apps/test-apps-materialized.yml index a0be6a9c..fc80da46 100755 --- a/test-apps/test-apps-materialized.yml +++ b/test-apps/test-apps-materialized.yml @@ -2,7 +2,7 @@ ## ## AUTO GENERATED! DO NOT EDIT ## -## this configuration was materialized from /home/roman/dev/box/test-apps/test-apps.yml on 29/01/2024 19:14:01 +## this configuration was materialized from /home/roman/dev/box/test-apps/test-apps.yml on 11/02/2024 01:36:56 ## config: @@ -53,10 +53,10 @@ config: - 10 num_clusters: - 28 - - 56 + - 112 cores_per_cluster: - 1 - - 8 + - 4 memory_only: - false - true @@ -67,10 +67,18 @@ config: run_ahead: 10 - mode: deterministic run_ahead: 10 - - num_clusters: 56 - cores_per_cluster: 8 - - num_clusters: 56 - cores_per_cluster: 8 + - mode: deterministic + memory_only: true + - mode: nondeterministic + memory_only: true + - mode: serial + cores_per_cluster: 4 + memory_only: true + - mode: serial + num_clusters: 112 + memory_only: true + - num_clusters: 112 + cores_per_cluster: 4 exec_driven_simulate: repetitions: 1 timeout: null @@ -4784,12 +4792,12 @@ benchmarks: l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 length: 100 @@ -4797,7 +4805,7 @@ benchmarks: threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - '100' @@ -4811,100 +4819,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 length: 100 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '100' - - '32' - input_idx: 3 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - values: - dtype: 32 - length: 100 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 4 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - values: - dtype: 32 - length: 100 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '100' - - '32' - input_idx: 5 + input_idx: 3 common: repetitions: 2 timeout: null @@ -4913,7 +4853,7 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null @@ -4938,7 +4878,7 @@ benchmarks: args: - '100' - '32' - input_idx: 6 + input_idx: 4 common: repetitions: 2 timeout: null @@ -4954,12 +4894,12 @@ benchmarks: l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 length: 100 @@ -4967,80 +4907,12 @@ benchmarks: threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '100' - - '32' - input_idx: 7 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - '100' - '32' - input_idx: 8 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '100' - - '32' - input_idx: 9 + input_idx: 5 common: repetitions: 2 timeout: null @@ -5049,66 +4921,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 length: 100 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 10 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '100' - - '32' - input_idx: 11 + input_idx: 6 common: repetitions: 2 timeout: null @@ -5117,7 +4955,7 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null @@ -5142,7 +4980,7 @@ benchmarks: args: - '100' - '32' - input_idx: 12 + input_idx: 7 common: repetitions: 2 timeout: null @@ -5158,46 +4996,12 @@ benchmarks: l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '100' - - '32' - input_idx: 13 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 length: 100 @@ -5205,46 +5009,12 @@ benchmarks: threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - '100' - '32' - input_idx: 14 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '100' - - '32' - input_idx: 15 + input_idx: 8 common: repetitions: 2 timeout: null @@ -5253,66 +5023,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 length: 100 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 16 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - values: - dtype: 32 - length: 100 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '100' - - '32' - input_idx: 17 + input_idx: 9 common: repetitions: 2 timeout: null @@ -5321,7 +5057,7 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null @@ -5346,7 +5082,7 @@ benchmarks: args: - '100' - '32' - input_idx: 18 + input_idx: 10 common: repetitions: 2 timeout: null @@ -5362,12 +5098,12 @@ benchmarks: l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 length: 100 @@ -5375,12 +5111,12 @@ benchmarks: threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - '100' - '32' - input_idx: 19 + input_idx: 11 common: repetitions: 2 timeout: null @@ -5389,32 +5125,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 length: 100 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 20 + input_idx: 12 common: repetitions: 2 timeout: null @@ -5423,32 +5159,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 length: 100 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - '100' - '32' - input_idx: 21 + input_idx: 13 common: repetitions: 2 timeout: null @@ -5457,32 +5193,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 length: 100 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '100' - '32' - input_idx: 22 + input_idx: 14 common: repetitions: 2 timeout: null @@ -5491,32 +5227,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 length: 100 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '100' - '32' - input_idx: 23 + input_idx: 15 common: repetitions: 2 timeout: null @@ -5525,32 +5261,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 24 + input_idx: 16 common: repetitions: 2 timeout: null @@ -5559,32 +5295,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - '100' - '32' - input_idx: 25 + input_idx: 17 common: repetitions: 2 timeout: null @@ -5593,32 +5329,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - '100' - '32' - input_idx: 26 + input_idx: 18 common: repetitions: 2 timeout: null @@ -5627,32 +5363,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - '100' - '32' - input_idx: 27 + input_idx: 19 common: repetitions: 2 timeout: null @@ -5661,32 +5397,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '100' - '32' - input_idx: 28 + input_idx: 20 common: repetitions: 2 timeout: null @@ -5695,32 +5431,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 length: 100 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '100' - '32' - input_idx: 29 + input_idx: 21 common: repetitions: 2 timeout: null @@ -5729,32 +5465,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 30 + input_idx: 22 common: repetitions: 2 timeout: null @@ -5763,32 +5499,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '100' + - '1000' - '32' - input_idx: 31 + input_idx: 23 common: repetitions: 2 timeout: null @@ -5797,32 +5533,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 32 + input_idx: 24 common: repetitions: 2 timeout: null @@ -5831,32 +5567,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 33 + input_idx: 25 common: repetitions: 2 timeout: null @@ -5865,32 +5601,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 34 + input_idx: 26 common: repetitions: 2 timeout: null @@ -5899,32 +5635,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 + length: 1000 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 35 + input_idx: 27 common: repetitions: 2 timeout: null @@ -5933,32 +5669,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 100 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + length: 1000 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 36 + input_idx: 28 common: repetitions: 2 timeout: null @@ -5967,32 +5703,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 100 - mode: nondeterministic + length: 1000 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 37 + input_idx: 29 common: repetitions: 2 timeout: null @@ -6001,32 +5737,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 100 - mode: nondeterministic + length: 1000 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 38 + input_idx: 30 common: repetitions: 2 timeout: null @@ -6035,32 +5771,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 100 - mode: nondeterministic + length: 1000 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 39 + input_idx: 31 common: repetitions: 2 timeout: null @@ -6069,32 +5805,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 + length: 1000 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 40 + input_idx: 32 common: repetitions: 2 timeout: null @@ -6103,32 +5839,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 100 + length: 1000 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100' + - '1000' - '32' - input_idx: 41 + input_idx: 33 common: repetitions: 2 timeout: null @@ -6137,32 +5873,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 length: 1000 - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - '1000' - '32' - input_idx: 42 + input_idx: 34 common: repetitions: 2 timeout: null @@ -6171,32 +5907,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 length: 1000 - mode: serial + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '1000' - '32' - input_idx: 43 + input_idx: 35 common: repetitions: 2 timeout: null @@ -6205,32 +5941,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 length: 1000 - mode: serial + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - '1000' - '32' - input_idx: 44 + input_idx: 36 common: repetitions: 2 timeout: null @@ -6239,32 +5975,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 length: 1000 - mode: serial + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - '1000' - '32' - input_idx: 45 + input_idx: 37 common: repetitions: 2 timeout: null @@ -6273,32 +6009,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 length: 1000 - mode: serial - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - '1000' - '32' - input_idx: 46 + input_idx: 38 common: repetitions: 2 timeout: null @@ -6307,32 +6043,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 length: 1000 - mode: serial - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - '1000' - '32' - input_idx: 47 + input_idx: 39 common: repetitions: 2 timeout: null @@ -6341,32 +6077,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 length: 1000 - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - '1000' - '32' - input_idx: 48 + input_idx: 40 common: repetitions: 2 timeout: null @@ -6375,32 +6111,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 length: 1000 - mode: deterministic - threads: 4 - run_ahead: 5 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '1000' - '32' - input_idx: 49 + input_idx: 41 common: repetitions: 2 timeout: null @@ -6409,32 +6145,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 length: 1000 - mode: deterministic - threads: 4 - run_ahead: 5 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - '1000' - '32' - input_idx: 50 + input_idx: 42 common: repetitions: 2 timeout: null @@ -6443,32 +6179,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 length: 1000 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - '1000' - '32' - input_idx: 51 + input_idx: 43 common: repetitions: 2 timeout: null @@ -6477,32 +6213,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: deterministic + length: 10000 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 52 + input_idx: 44 common: repetitions: 2 timeout: null @@ -6511,32 +6247,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: deterministic + length: 10000 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '1000' + - '10000' - '32' - input_idx: 53 + input_idx: 45 common: repetitions: 2 timeout: null @@ -6545,32 +6281,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: deterministic - threads: 8 + length: 10000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 54 + input_idx: 46 common: repetitions: 2 timeout: null @@ -6579,32 +6315,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: deterministic - threads: 8 + length: 10000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 55 + input_idx: 47 common: repetitions: 2 timeout: null @@ -6613,32 +6349,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 56 + input_idx: 48 common: repetitions: 2 timeout: null @@ -6647,32 +6383,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 57 + input_idx: 49 common: repetitions: 2 timeout: null @@ -6681,32 +6417,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 58 + input_idx: 50 common: repetitions: 2 timeout: null @@ -6715,32 +6451,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 59 + input_idx: 51 common: repetitions: 2 timeout: null @@ -6749,32 +6485,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 4 + length: 10000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 60 + input_idx: 52 common: repetitions: 2 timeout: null @@ -6783,32 +6519,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 4 + length: 10000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 61 + input_idx: 53 common: repetitions: 2 timeout: null @@ -6817,32 +6553,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 62 + input_idx: 54 common: repetitions: 2 timeout: null @@ -6851,32 +6587,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 63 + input_idx: 55 common: repetitions: 2 timeout: null @@ -6885,32 +6621,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 64 + input_idx: 56 common: repetitions: 2 timeout: null @@ -6919,32 +6655,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 65 + input_idx: 57 common: repetitions: 2 timeout: null @@ -6953,32 +6689,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 66 + input_idx: 58 common: repetitions: 2 timeout: null @@ -6987,32 +6723,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 67 + input_idx: 59 common: repetitions: 2 timeout: null @@ -7021,32 +6757,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 68 + input_idx: 60 common: repetitions: 2 timeout: null @@ -7055,32 +6791,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 69 + input_idx: 61 common: repetitions: 2 timeout: null @@ -7089,32 +6825,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 70 + input_idx: 62 common: repetitions: 2 timeout: null @@ -7123,32 +6859,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 71 + input_idx: 63 common: repetitions: 2 timeout: null @@ -7157,32 +6893,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 72 + input_idx: 64 common: repetitions: 2 timeout: null @@ -7191,32 +6927,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 - length: 1000 + length: 10000 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '10000' - '32' - input_idx: 73 + input_idx: 65 common: repetitions: 2 timeout: null @@ -7225,32 +6961,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 + length: 20000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 74 + input_idx: 66 common: repetitions: 2 timeout: null @@ -7259,32 +6995,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 + length: 20000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '1000' + - '20000' - '32' - input_idx: 75 + input_idx: 67 common: repetitions: 2 timeout: null @@ -7293,32 +7029,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 + length: 20000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 76 + input_idx: 68 common: repetitions: 2 timeout: null @@ -7327,32 +7063,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 + length: 20000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 77 + input_idx: 69 common: repetitions: 2 timeout: null @@ -7361,32 +7097,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + length: 20000 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 78 + input_idx: 70 common: repetitions: 2 timeout: null @@ -7395,32 +7131,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + length: 20000 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 79 + input_idx: 71 common: repetitions: 2 timeout: null @@ -7429,32 +7165,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 1000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + length: 20000 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 80 + input_idx: 72 common: repetitions: 2 timeout: null @@ -7463,32 +7199,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 - mode: nondeterministic + length: 20000 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 81 + input_idx: 73 common: repetitions: 2 timeout: null @@ -7497,32 +7233,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 - mode: nondeterministic + length: 20000 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 82 + input_idx: 74 common: repetitions: 2 timeout: null @@ -7531,32 +7267,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 1000 - mode: nondeterministic + length: 20000 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '20000' - '32' - input_idx: 83 + input_idx: 75 common: repetitions: 2 timeout: null @@ -7565,32 +7301,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 84 + input_idx: 76 common: repetitions: 2 timeout: null @@ -7599,32 +7335,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 85 + input_idx: 77 common: repetitions: 2 timeout: null @@ -7633,32 +7369,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 86 + input_idx: 78 common: repetitions: 2 timeout: null @@ -7667,32 +7403,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 87 + input_idx: 79 common: repetitions: 2 timeout: null @@ -7701,32 +7437,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 88 + input_idx: 80 common: repetitions: 2 timeout: null @@ -7735,32 +7471,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 - mode: serial + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 89 + input_idx: 81 common: repetitions: 2 timeout: null @@ -7769,32 +7505,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 + length: 20000 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 90 + input_idx: 82 common: repetitions: 2 timeout: null @@ -7803,32 +7539,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 + length: 20000 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 91 + input_idx: 83 common: repetitions: 2 timeout: null @@ -7837,32 +7573,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 + length: 20000 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 92 + input_idx: 84 common: repetitions: 2 timeout: null @@ -7871,32 +7607,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 - run_ahead: 5 + length: 20000 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 93 + input_idx: 85 common: repetitions: 2 timeout: null @@ -7905,32 +7641,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + length: 20000 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 94 + input_idx: 86 common: repetitions: 2 timeout: null @@ -7939,32 +7675,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + length: 20000 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '20000' - '32' - input_idx: 95 + input_idx: 87 common: repetitions: 2 timeout: null @@ -7973,32 +7709,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 8 + length: 100000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 96 + input_idx: 88 common: repetitions: 2 timeout: null @@ -8007,32 +7743,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 8 + length: 100000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '10000' + - '100000' - '32' - input_idx: 97 + input_idx: 89 common: repetitions: 2 timeout: null @@ -8041,32 +7777,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 8 + length: 100000 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 98 + input_idx: 90 common: repetitions: 2 timeout: null @@ -8075,32 +7811,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: deterministic - threads: 8 + length: 100000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 99 + input_idx: 91 common: repetitions: 2 timeout: null @@ -8109,32 +7845,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 100 + input_idx: 92 common: repetitions: 2 timeout: null @@ -8143,32 +7879,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 101 + input_idx: 93 common: repetitions: 2 timeout: null @@ -8177,32 +7913,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic + length: 100000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 102 + input_idx: 94 common: repetitions: 2 timeout: null @@ -8211,32 +7947,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 4 + length: 100000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 103 + input_idx: 95 common: repetitions: 2 timeout: null @@ -8245,32 +7981,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 4 + length: 100000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 104 + input_idx: 96 common: repetitions: 2 timeout: null @@ -8279,32 +8015,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 4 + length: 100000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 105 + input_idx: 97 common: repetitions: 2 timeout: null @@ -8313,32 +8049,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 106 + input_idx: 98 common: repetitions: 2 timeout: null @@ -8347,32 +8083,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 107 + input_idx: 99 common: repetitions: 2 timeout: null @@ -8381,32 +8117,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 108 + input_idx: 100 common: repetitions: 2 timeout: null @@ -8415,32 +8151,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 109 + input_idx: 101 common: repetitions: 2 timeout: null @@ -8449,32 +8185,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 110 + input_idx: 102 common: repetitions: 2 timeout: null @@ -8483,32 +8219,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 111 + input_idx: 103 common: repetitions: 2 timeout: null @@ -8517,32 +8253,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 112 + input_idx: 104 common: repetitions: 2 timeout: null @@ -8551,32 +8287,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 113 + input_idx: 105 common: repetitions: 2 timeout: null @@ -8585,32 +8321,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 114 + input_idx: 106 common: repetitions: 2 timeout: null @@ -8619,32 +8355,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 115 + input_idx: 107 common: repetitions: 2 timeout: null @@ -8653,32 +8389,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 116 + input_idx: 108 common: repetitions: 2 timeout: null @@ -8687,32 +8423,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 - length: 10000 + length: 100000 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '100000' - '32' - input_idx: 117 + input_idx: 109 common: repetitions: 2 timeout: null @@ -8721,32 +8457,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 + length: 500000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 118 + input_idx: 110 common: repetitions: 2 timeout: null @@ -8755,32 +8491,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 + length: 500000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '10000' + - '500000' - '32' - input_idx: 119 + input_idx: 111 common: repetitions: 2 timeout: null @@ -8789,32 +8525,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + length: 500000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 120 + input_idx: 112 common: repetitions: 2 timeout: null @@ -8823,32 +8559,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + length: 500000 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 121 + input_idx: 113 common: repetitions: 2 timeout: null @@ -8857,32 +8593,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + length: 500000 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 122 + input_idx: 114 common: repetitions: 2 timeout: null @@ -8891,32 +8627,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + length: 500000 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 123 + input_idx: 115 common: repetitions: 2 timeout: null @@ -8925,32 +8661,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 10000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + length: 500000 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 124 + input_idx: 116 common: repetitions: 2 timeout: null @@ -8959,32 +8695,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 10000 - mode: nondeterministic + length: 500000 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '500000' - '32' - input_idx: 125 + input_idx: 117 common: repetitions: 2 timeout: null @@ -8993,32 +8729,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 20000 - mode: serial - threads: 4 + length: 500000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 126 + input_idx: 118 common: repetitions: 2 timeout: null @@ -9027,32 +8763,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 20000 - mode: serial - threads: 4 + length: 500000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 127 + input_idx: 119 common: repetitions: 2 timeout: null @@ -9061,32 +8797,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 20000 - mode: serial + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 128 + input_idx: 120 common: repetitions: 2 timeout: null @@ -9095,32 +8831,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 32 - length: 20000 - mode: serial + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 129 + input_idx: 121 common: repetitions: 2 timeout: null @@ -9129,32 +8865,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 32 - length: 20000 - mode: serial + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 130 + input_idx: 122 common: repetitions: 2 timeout: null @@ -9163,32 +8899,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 20000 - mode: serial + length: 500000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 131 + input_idx: 123 common: repetitions: 2 timeout: null @@ -9197,32 +8933,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 32 - length: 20000 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 132 + input_idx: 124 common: repetitions: 2 timeout: null @@ -9231,32 +8967,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 32 - length: 20000 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 133 + input_idx: 125 common: repetitions: 2 timeout: null @@ -9265,32 +9001,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic - threads: 4 + length: 500000 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 134 + input_idx: 126 common: repetitions: 2 timeout: null @@ -9299,32 +9035,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic - threads: 4 + length: 500000 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 135 + input_idx: 127 common: repetitions: 2 timeout: null @@ -9333,32 +9069,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic - threads: 4 + length: 500000 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 136 + input_idx: 128 common: repetitions: 2 timeout: null @@ -9367,32 +9103,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + length: 500000 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 137 + input_idx: 129 common: repetitions: 2 timeout: null @@ -9401,32 +9137,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 138 + input_idx: 130 common: repetitions: 2 timeout: null @@ -9435,32 +9171,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 32 - length: 20000 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '500000' - '32' - input_idx: 139 + input_idx: 131 common: repetitions: 2 timeout: null @@ -9469,32 +9205,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: deterministic - threads: 8 + dtype: 64 + length: 100 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 140 + - '100' + - '64' + input_idx: 132 common: repetitions: 2 timeout: null @@ -9503,32 +9239,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: deterministic - threads: 8 + dtype: 64 + length: 100 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '20000' - - '32' - input_idx: 141 + - '100' + - '64' + input_idx: 133 common: repetitions: 2 timeout: null @@ -9537,32 +9273,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: deterministic - threads: 8 + dtype: 64 + length: 100 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '32' - input_idx: 142 + - '100' + - '64' + input_idx: 134 common: repetitions: 2 timeout: null @@ -9571,32 +9307,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: deterministic - threads: 8 + dtype: 64 + length: 100 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '32' - input_idx: 143 + - '100' + - '64' + input_idx: 135 common: repetitions: 2 timeout: null @@ -9605,32 +9341,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic + dtype: 64 + length: 100 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 144 + - '100' + - '64' + input_idx: 136 common: repetitions: 2 timeout: null @@ -9639,32 +9375,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic + dtype: 64 + length: 100 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '32' - input_idx: 145 + - '100' + - '64' + input_idx: 137 common: repetitions: 2 timeout: null @@ -9673,32 +9409,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic + dtype: 64 + length: 100 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 146 + - '100' + - '64' + input_idx: 138 common: repetitions: 2 timeout: null @@ -9707,32 +9443,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 4 + dtype: 64 + length: 100 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '20000' - - '32' - input_idx: 147 + - '100' + - '64' + input_idx: 139 common: repetitions: 2 timeout: null @@ -9741,32 +9477,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 4 + dtype: 64 + length: 100 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '32' - input_idx: 148 + - '100' + - '64' + input_idx: 140 common: repetitions: 2 timeout: null @@ -9775,32 +9511,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 4 + dtype: 64 + length: 100 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '32' - input_idx: 149 + - '100' + - '64' + input_idx: 141 common: repetitions: 2 timeout: null @@ -9809,32 +9545,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 150 + - '100' + - '64' + input_idx: 142 common: repetitions: 2 timeout: null @@ -9843,32 +9579,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '32' - input_idx: 151 + - '100' + - '64' + input_idx: 143 common: repetitions: 2 timeout: null @@ -9877,32 +9613,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 152 + - '100' + - '64' + input_idx: 144 common: repetitions: 2 timeout: null @@ -9911,32 +9647,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '20000' - - '32' - input_idx: 153 + - '100' + - '64' + input_idx: 145 common: repetitions: 2 timeout: null @@ -9945,32 +9681,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '32' - input_idx: 154 + - '100' + - '64' + input_idx: 146 common: repetitions: 2 timeout: null @@ -9979,32 +9715,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '32' - input_idx: 155 + - '100' + - '64' + input_idx: 147 common: repetitions: 2 timeout: null @@ -10013,22 +9749,22 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 run_ahead: 5 @@ -10036,9 +9772,9 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 156 + - '100' + - '64' + input_idx: 148 common: repetitions: 2 timeout: null @@ -10047,32 +9783,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '32' - input_idx: 157 + - '100' + - '64' + input_idx: 149 common: repetitions: 2 timeout: null @@ -10081,32 +9817,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 158 + - '100' + - '64' + input_idx: 150 common: repetitions: 2 timeout: null @@ -10115,32 +9851,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '20000' - - '32' - input_idx: 159 + - '100' + - '64' + input_idx: 151 common: repetitions: 2 timeout: null @@ -10149,32 +9885,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '32' - input_idx: 160 + - '100' + - '64' + input_idx: 152 common: repetitions: 2 timeout: null @@ -10183,32 +9919,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: - dtype: 32 - length: 20000 + dtype: 64 + length: 100 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '32' - input_idx: 161 + - '100' + - '64' + input_idx: 153 common: repetitions: 2 timeout: null @@ -10217,32 +9953,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dtype: 64 + length: 1000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 162 + - '1000' + - '64' + input_idx: 154 common: repetitions: 2 timeout: null @@ -10251,32 +9987,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dtype: 64 + length: 1000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '20000' - - '32' - input_idx: 163 + - '1000' + - '64' + input_idx: 155 common: repetitions: 2 timeout: null @@ -10285,32 +10021,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dtype: 64 + length: 1000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '32' - input_idx: 164 + - '1000' + - '64' + input_idx: 156 common: repetitions: 2 timeout: null @@ -10319,33 +10055,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '20000' - - '32' - input_idx: 165 - common: + dtype: 64 + length: 1000 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + args: + - '1000' + - '64' + input_idx: 157 + common: repetitions: 2 timeout: null concurrency: 1 @@ -10353,32 +10089,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + dtype: 64 + length: 1000 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '32' - input_idx: 166 + - '1000' + - '64' + input_idx: 158 common: repetitions: 2 timeout: null @@ -10387,32 +10123,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 20000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + dtype: 64 + length: 1000 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '32' - input_idx: 167 + - '1000' + - '64' + input_idx: 159 common: repetitions: 2 timeout: null @@ -10421,32 +10157,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: serial + dtype: 64 + length: 1000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 168 + - '1000' + - '64' + input_idx: 160 common: repetitions: 2 timeout: null @@ -10455,32 +10191,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: serial - threads: 4 + dtype: 64 + length: 1000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 169 + - '1000' + - '64' + input_idx: 161 common: repetitions: 2 timeout: null @@ -10489,32 +10225,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: serial - threads: 4 + dtype: 64 + length: 1000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 170 + - '1000' + - '64' + input_idx: 162 common: repetitions: 2 timeout: null @@ -10523,32 +10259,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: serial - threads: 4 + dtype: 64 + length: 1000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' - - '32' - input_idx: 171 + - '1000' + - '64' + input_idx: 163 common: repetitions: 2 timeout: null @@ -10557,32 +10293,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: serial + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 172 + - '1000' + - '64' + input_idx: 164 common: repetitions: 2 timeout: null @@ -10591,32 +10327,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: serial + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '32' - input_idx: 173 + - '1000' + - '64' + input_idx: 165 common: repetitions: 2 timeout: null @@ -10625,32 +10361,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 174 + - '1000' + - '64' + input_idx: 166 common: repetitions: 2 timeout: null @@ -10659,32 +10395,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 175 + - '1000' + - '64' + input_idx: 167 common: repetitions: 2 timeout: null @@ -10693,32 +10429,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 176 + - '1000' + - '64' + input_idx: 168 common: repetitions: 2 timeout: null @@ -10727,32 +10463,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' - - '32' - input_idx: 177 + - '1000' + - '64' + input_idx: 169 common: repetitions: 2 timeout: null @@ -10761,32 +10497,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic - threads: 4 + dtype: 64 + length: 1000 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 178 + - '1000' + - '64' + input_idx: 170 common: repetitions: 2 timeout: null @@ -10795,32 +10531,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic - threads: 4 + dtype: 64 + length: 1000 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '32' - input_idx: 179 + - '1000' + - '64' + input_idx: 171 common: repetitions: 2 timeout: null @@ -10829,32 +10565,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 180 + - '1000' + - '64' + input_idx: 172 common: repetitions: 2 timeout: null @@ -10863,32 +10599,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 181 + - '1000' + - '64' + input_idx: 173 common: repetitions: 2 timeout: null @@ -10897,32 +10633,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 182 + - '1000' + - '64' + input_idx: 174 common: repetitions: 2 timeout: null @@ -10931,32 +10667,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 - mode: deterministic + dtype: 64 + length: 1000 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' - - '32' - input_idx: 183 + - '1000' + - '64' + input_idx: 175 common: repetitions: 2 timeout: null @@ -10965,32 +10701,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic - threads: 8 + dtype: 64 + length: 10000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 184 + - '10000' + - '64' + input_idx: 176 common: repetitions: 2 timeout: null @@ -10999,32 +10735,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: deterministic - threads: 8 + dtype: 64 + length: 10000 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '100000' - - '32' - input_idx: 185 + - '10000' + - '64' + input_idx: 177 common: repetitions: 2 timeout: null @@ -11033,32 +10769,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic + dtype: 64 + length: 10000 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 186 + - '10000' + - '64' + input_idx: 178 common: repetitions: 2 timeout: null @@ -11067,32 +10803,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic + dtype: 64 + length: 10000 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 187 + - '10000' + - '64' + input_idx: 179 common: repetitions: 2 timeout: null @@ -11101,32 +10837,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic + dtype: 64 + length: 10000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 188 + - '10000' + - '64' + input_idx: 180 common: repetitions: 2 timeout: null @@ -11135,32 +10871,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic + dtype: 64 + length: 10000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '32' - input_idx: 189 + - '10000' + - '64' + input_idx: 181 common: repetitions: 2 timeout: null @@ -11169,32 +10905,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic + dtype: 64 + length: 10000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 190 + - '10000' + - '64' + input_idx: 182 common: repetitions: 2 timeout: null @@ -11203,32 +10939,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 4 + dtype: 64 + length: 10000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 191 + - '10000' + - '64' + input_idx: 183 common: repetitions: 2 timeout: null @@ -11237,32 +10973,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 4 - run_ahead: 10 + dtype: 64 + length: 10000 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 192 + - '10000' + - '64' + input_idx: 184 common: repetitions: 2 timeout: null @@ -11271,32 +11007,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 + dtype: 64 + length: 10000 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 193 + - '10000' + - '64' + input_idx: 185 common: repetitions: 2 timeout: null @@ -11305,32 +11041,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 194 + - '10000' + - '64' + input_idx: 186 common: repetitions: 2 timeout: null @@ -11339,32 +11075,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '32' - input_idx: 195 + - '10000' + - '64' + input_idx: 187 common: repetitions: 2 timeout: null @@ -11373,32 +11109,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 196 + - '10000' + - '64' + input_idx: 188 common: repetitions: 2 timeout: null @@ -11407,32 +11143,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 197 + - '10000' + - '64' + input_idx: 189 common: repetitions: 2 timeout: null @@ -11441,32 +11177,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 198 + - '10000' + - '64' + input_idx: 190 common: repetitions: 2 timeout: null @@ -11475,32 +11211,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 199 + - '10000' + - '64' + input_idx: 191 common: repetitions: 2 timeout: null @@ -11509,32 +11245,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 200 + - '10000' + - '64' + input_idx: 192 common: repetitions: 2 timeout: null @@ -11543,32 +11279,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '32' - input_idx: 201 + - '10000' + - '64' + input_idx: 193 common: repetitions: 2 timeout: null @@ -11577,32 +11313,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 202 + - '10000' + - '64' + input_idx: 194 common: repetitions: 2 timeout: null @@ -11611,32 +11347,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 203 + - '10000' + - '64' + input_idx: 195 common: repetitions: 2 timeout: null @@ -11645,32 +11381,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 204 + - '10000' + - '64' + input_idx: 196 common: repetitions: 2 timeout: null @@ -11679,32 +11415,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: - dtype: 32 - length: 100000 + dtype: 64 + length: 10000 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 205 + - '10000' + - '64' + input_idx: 197 common: repetitions: 2 timeout: null @@ -11713,32 +11449,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dtype: 64 + length: 20000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '32' - input_idx: 206 + - '20000' + - '64' + input_idx: 198 common: repetitions: 2 timeout: null @@ -11747,32 +11483,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dtype: 64 + length: 20000 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '100000' - - '32' - input_idx: 207 + - '20000' + - '64' + input_idx: 199 common: repetitions: 2 timeout: null @@ -11781,32 +11517,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + dtype: 64 + length: 20000 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '32' - input_idx: 208 + - '20000' + - '64' + input_idx: 200 common: repetitions: 2 timeout: null @@ -11815,32 +11551,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 100000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + dtype: 64 + length: 20000 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '32' - input_idx: 209 + - '20000' + - '64' + input_idx: 201 common: repetitions: 2 timeout: null @@ -11849,32 +11585,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: serial + dtype: 64 + length: 20000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 210 + - '20000' + - '64' + input_idx: 202 common: repetitions: 2 timeout: null @@ -11883,32 +11619,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: serial + dtype: 64 + length: 20000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 211 + - '20000' + - '64' + input_idx: 203 common: repetitions: 2 timeout: null @@ -11917,32 +11653,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: serial + dtype: 64 + length: 20000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 212 + - '20000' + - '64' + input_idx: 204 common: repetitions: 2 timeout: null @@ -11951,32 +11687,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: serial - threads: 4 + dtype: 64 + length: 20000 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 213 + - '20000' + - '64' + input_idx: 205 common: repetitions: 2 timeout: null @@ -11985,32 +11721,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: serial - threads: 4 + dtype: 64 + length: 20000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 214 + - '20000' + - '64' + input_idx: 206 common: repetitions: 2 timeout: null @@ -12019,32 +11755,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: serial - threads: 4 + dtype: 64 + length: 20000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 215 + - '20000' + - '64' + input_idx: 207 common: repetitions: 2 timeout: null @@ -12053,32 +11789,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 216 + - '20000' + - '64' + input_idx: 208 common: repetitions: 2 timeout: null @@ -12087,32 +11823,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 217 + - '20000' + - '64' + input_idx: 209 common: repetitions: 2 timeout: null @@ -12121,32 +11857,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 218 + - '20000' + - '64' + input_idx: 210 common: repetitions: 2 timeout: null @@ -12155,32 +11891,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 219 + - '20000' + - '64' + input_idx: 211 common: repetitions: 2 timeout: null @@ -12189,32 +11925,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 220 + - '20000' + - '64' + input_idx: 212 common: repetitions: 2 timeout: null @@ -12223,32 +11959,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 221 + - '20000' + - '64' + input_idx: 213 common: repetitions: 2 timeout: null @@ -12257,32 +11993,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 222 + - '20000' + - '64' + input_idx: 214 common: repetitions: 2 timeout: null @@ -12291,32 +12027,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 223 + - '20000' + - '64' + input_idx: 215 common: repetitions: 2 timeout: null @@ -12325,32 +12061,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 224 + - '20000' + - '64' + input_idx: 216 common: repetitions: 2 timeout: null @@ -12359,32 +12095,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 225 + - '20000' + - '64' + input_idx: 217 common: repetitions: 2 timeout: null @@ -12393,32 +12129,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 226 + - '20000' + - '64' + input_idx: 218 common: repetitions: 2 timeout: null @@ -12427,32 +12163,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 - mode: deterministic + dtype: 64 + length: 20000 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 227 + - '20000' + - '64' + input_idx: 219 common: repetitions: 2 timeout: null @@ -12461,32 +12197,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 228 + - '100000' + - '64' + input_idx: 220 common: repetitions: 2 timeout: null @@ -12495,32 +12231,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '500000' - - '32' - input_idx: 229 + - '100000' + - '64' + input_idx: 221 common: repetitions: 2 timeout: null @@ -12529,32 +12265,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 230 + - '100000' + - '64' + input_idx: 222 common: repetitions: 2 timeout: null @@ -12563,32 +12299,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 231 + - '100000' + - '64' + input_idx: 223 common: repetitions: 2 timeout: null @@ -12597,32 +12333,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 232 + - '100000' + - '64' + input_idx: 224 common: repetitions: 2 timeout: null @@ -12631,32 +12367,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 233 + - '100000' + - '64' + input_idx: 225 common: repetitions: 2 timeout: null @@ -12665,32 +12401,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 - mode: nondeterministic + dtype: 64 + length: 100000 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 234 + - '100000' + - '64' + input_idx: 226 common: repetitions: 2 timeout: null @@ -12699,32 +12435,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: nondeterministic - threads: 4 - run_ahead: 10 + dtype: 64 + length: 100000 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 235 + - '100000' + - '64' + input_idx: 227 common: repetitions: 2 timeout: null @@ -12733,32 +12469,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: nondeterministic - threads: 4 - run_ahead: 10 + dtype: 64 + length: 100000 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 236 + - '100000' + - '64' + input_idx: 228 common: repetitions: 2 timeout: null @@ -12767,32 +12503,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + dtype: 64 + length: 100000 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 237 + - '100000' + - '64' + input_idx: 229 common: repetitions: 2 timeout: null @@ -12801,32 +12537,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 238 + - '100000' + - '64' + input_idx: 230 common: repetitions: 2 timeout: null @@ -12835,33 +12571,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 239 - common: + - '100000' + - '64' + input_idx: 231 + common: repetitions: 2 timeout: null concurrency: 1 @@ -12869,32 +12605,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 240 + - '100000' + - '64' + input_idx: 232 common: repetitions: 2 timeout: null @@ -12903,32 +12639,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 241 + - '100000' + - '64' + input_idx: 233 common: repetitions: 2 timeout: null @@ -12937,32 +12673,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 242 + - '100000' + - '64' + input_idx: 234 common: repetitions: 2 timeout: null @@ -12971,32 +12707,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + threads: 4 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 243 + - '100000' + - '64' + input_idx: 235 common: repetitions: 2 timeout: null @@ -13005,32 +12741,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 244 + - '100000' + - '64' + input_idx: 236 common: repetitions: 2 timeout: null @@ -13039,32 +12775,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '32' - input_idx: 245 + - '100000' + - '64' + input_idx: 237 common: repetitions: 2 timeout: null @@ -13073,32 +12809,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '32' - input_idx: 246 + - '100000' + - '64' + input_idx: 238 common: repetitions: 2 timeout: null @@ -13107,32 +12843,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '32' - input_idx: 247 + - '100000' + - '64' + input_idx: 239 common: repetitions: 2 timeout: null @@ -13141,32 +12877,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '32' - input_idx: 248 + - '100000' + - '64' + input_idx: 240 common: repetitions: 2 timeout: null @@ -13175,32 +12911,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: - dtype: 32 - length: 500000 + dtype: 64 + length: 100000 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - - '32' - input_idx: 249 + - '100000' + - '64' + input_idx: 241 common: repetitions: 2 timeout: null @@ -13209,32 +12945,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 + dtype: 64 length: 500000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - '500000' - - '32' - input_idx: 250 + - '64' + input_idx: 242 common: repetitions: 2 timeout: null @@ -13243,32 +12979,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: - dtype: 32 + dtype: 64 length: 500000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - '500000' - - '32' - input_idx: 251 + - '64' + input_idx: 243 common: repetitions: 2 timeout: null @@ -13277,32 +13013,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-32-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 values: dtype: 64 - length: 100 + length: 500000 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 252 + input_idx: 244 common: repetitions: 2 timeout: null @@ -13311,32 +13047,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4 values: dtype: 64 - length: 100 + length: 500000 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 253 + input_idx: 245 common: repetitions: 2 timeout: null @@ -13345,32 +13081,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: serial + length: 500000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 254 + input_idx: 246 common: repetitions: 2 timeout: null @@ -13379,32 +13115,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: serial + length: 500000 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 255 + input_idx: 247 common: repetitions: 2 timeout: null @@ -13413,32 +13149,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: serial + length: 500000 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 256 + input_idx: 248 common: repetitions: 2 timeout: null @@ -13447,32 +13183,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 64 - length: 100 - mode: serial - threads: 4 + length: 500000 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 257 + input_idx: 249 common: repetitions: 2 timeout: null @@ -13481,32 +13217,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 64 - length: 100 + length: 500000 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 258 + input_idx: 250 common: repetitions: 2 timeout: null @@ -13515,32 +13251,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 64 - length: 100 + length: 500000 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 259 + input_idx: 251 common: repetitions: 2 timeout: null @@ -13549,32 +13285,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 260 + input_idx: 252 common: repetitions: 2 timeout: null @@ -13583,32 +13319,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 261 + input_idx: 253 common: repetitions: 2 timeout: null @@ -13617,32 +13353,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 262 + input_idx: 254 common: repetitions: 2 timeout: null @@ -13651,32 +13387,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 263 + input_idx: 255 common: repetitions: 2 timeout: null @@ -13685,32 +13421,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 values: dtype: 64 - length: 100 - mode: deterministic - threads: 8 - run_ahead: 5 + length: 500000 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 264 + input_idx: 256 common: repetitions: 2 timeout: null @@ -13719,32 +13455,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4 values: dtype: 64 - length: 100 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + length: 500000 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 265 + input_idx: 257 common: repetitions: 2 timeout: null @@ -13753,32 +13489,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 266 + input_idx: 258 common: repetitions: 2 timeout: null @@ -13787,32 +13523,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 267 + input_idx: 259 common: repetitions: 2 timeout: null @@ -13821,32 +13557,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 268 + input_idx: 260 common: repetitions: 2 timeout: null @@ -13855,32 +13591,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 64 - length: 100 - mode: deterministic + length: 500000 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 1 + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 269 + input_idx: 261 common: repetitions: 2 timeout: null @@ -13889,32 +13625,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 values: dtype: 64 - length: 100 + length: 500000 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 270 + input_idx: 262 common: repetitions: 2 timeout: null @@ -13923,32 +13659,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-4-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - name: vectorAdd benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/vectoradd rel_path: ./vectoradd executable: vectoradd_l1_enabled executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8 values: dtype: 64 - length: 100 + length: 500000 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' + - '500000' - '64' - input_idx: 271 + input_idx: 263 common: repetitions: 2 timeout: null @@ -13957,32 +13693,37 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace + accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + simple_matrixmul: + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 272 + - '32' + - '32' + - '32' + - '32' + input_idx: 0 common: repetitions: 2 timeout: null @@ -13991,32 +13732,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '100' - - '64' - input_idx: 273 + - '32' + - '32' + - '32' + - '32' + input_idx: 1 common: repetitions: 2 timeout: null @@ -14025,32 +13770,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '100' - - '64' - input_idx: 274 + - '32' + - '32' + - '32' + - '32' + input_idx: 2 common: repetitions: 2 timeout: null @@ -14059,32 +13808,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' - - '64' - input_idx: 275 + - '32' + - '32' + - '32' + - '32' + input_idx: 3 common: repetitions: 2 timeout: null @@ -14093,32 +13846,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 276 + - '32' + - '32' + - '32' + - '32' + input_idx: 4 common: repetitions: 2 timeout: null @@ -14127,32 +13884,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100' - - '64' - input_idx: 277 + - '32' + - '32' + - '32' + - '32' + input_idx: 5 common: repetitions: 2 timeout: null @@ -14161,32 +13922,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 278 + - '32' + - '32' + - '32' + - '32' + input_idx: 6 common: repetitions: 2 timeout: null @@ -14195,66 +13960,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 - mode: nondeterministic - threads: 4 - run_ahead: 10 + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '100' - - '64' - input_idx: 279 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace - parallel: null - l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - values: - dtype: 64 - length: 100 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 280 + - '32' + - '32' + - '32' + - '32' + input_idx: 7 common: repetitions: 2 timeout: null @@ -14263,32 +13998,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100' - - '64' - input_idx: 281 + - '32' + - '32' + - '32' + - '32' + input_idx: 8 common: repetitions: 2 timeout: null @@ -14297,32 +14036,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 282 + - '32' + - '32' + - '32' + - '32' + input_idx: 9 common: repetitions: 2 timeout: null @@ -14331,32 +14074,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' - - '64' - input_idx: 283 + - '32' + - '32' + - '32' + - '32' + input_idx: 10 common: repetitions: 2 timeout: null @@ -14365,32 +14112,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100' - - '64' - input_idx: 284 + - '32' + - '32' + - '32' + - '32' + input_idx: 11 common: repetitions: 2 timeout: null @@ -14399,32 +14150,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100' - - '64' - input_idx: 285 + - '32' + - '32' + - '32' + - '32' + input_idx: 12 common: repetitions: 2 timeout: null @@ -14433,32 +14188,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + threads: 4 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 286 + - '32' + - '32' + - '32' + - '32' + input_idx: 13 common: repetitions: 2 timeout: null @@ -14467,32 +14226,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 4 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100' - - '64' - input_idx: 287 + - '32' + - '32' + - '32' + - '32' + input_idx: 14 common: repetitions: 2 timeout: null @@ -14501,32 +14264,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100' - - '64' - input_idx: 288 + - '32' + - '32' + - '32' + - '32' + input_idx: 15 common: repetitions: 2 timeout: null @@ -14535,32 +14302,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' - - '64' - input_idx: 289 + - '32' + - '32' + - '32' + - '32' + input_idx: 16 common: repetitions: 2 timeout: null @@ -14569,32 +14340,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100' - - '64' - input_idx: 290 + - '32' + - '32' + - '32' + - '32' + input_idx: 17 common: repetitions: 2 timeout: null @@ -14603,32 +14378,74 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + args: + - '32' + - '32' + - '32' + - '32' + input_idx: 18 + common: + repetitions: 2 + timeout: null + concurrency: 1 + enabled: null + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + parallel: null + l2_prefill: null + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + values: + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '100' - - '64' - input_idx: 291 + - '32' + - '32' + - '32' + - '32' + input_idx: 19 common: repetitions: 2 timeout: null @@ -14637,32 +14454,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '100' - - '64' - input_idx: 292 + - '32' + - '32' + - '32' + - '32' + input_idx: 20 common: repetitions: 2 timeout: null @@ -14671,32 +14492,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 100 + dtype: 32 + m: 32 + n: 32 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100' - - '64' - input_idx: 293 + - '32' + - '32' + - '32' + - '32' + input_idx: 21 common: repetitions: 2 timeout: null @@ -14705,22 +14530,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: serial threads: 4 run_ahead: 5 @@ -14728,9 +14555,11 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 294 + - '32' + input_idx: 22 common: repetitions: 2 timeout: null @@ -14739,22 +14568,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: serial threads: 4 run_ahead: 5 @@ -14762,9 +14593,11 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '1000' + - '32' + - '32' - '64' - input_idx: 295 + - '32' + input_idx: 23 common: repetitions: 2 timeout: null @@ -14773,32 +14606,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 296 + - '32' + input_idx: 24 common: repetitions: 2 timeout: null @@ -14807,32 +14644,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 297 + - '32' + input_idx: 25 common: repetitions: 2 timeout: null @@ -14841,32 +14682,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: serial + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 298 + - '32' + input_idx: 26 common: repetitions: 2 timeout: null @@ -14875,32 +14720,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: serial + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 299 + - '32' + input_idx: 27 common: repetitions: 2 timeout: null @@ -14909,32 +14758,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 300 + - '32' + input_idx: 28 common: repetitions: 2 timeout: null @@ -14943,32 +14796,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 301 + - '32' + input_idx: 29 common: repetitions: 2 timeout: null @@ -14977,32 +14834,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 302 + - '32' + input_idx: 30 common: repetitions: 2 timeout: null @@ -15011,32 +14872,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 303 + - '32' + input_idx: 31 common: repetitions: 2 timeout: null @@ -15045,32 +14910,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 304 + - '32' + input_idx: 32 common: repetitions: 2 timeout: null @@ -15079,32 +14948,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 305 + - '32' + input_idx: 33 common: repetitions: 2 timeout: null @@ -15113,32 +14986,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 306 + - '32' + input_idx: 34 common: repetitions: 2 timeout: null @@ -15147,32 +15024,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic - threads: 8 - run_ahead: 5 + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 307 + - '32' + input_idx: 35 common: repetitions: 2 timeout: null @@ -15181,32 +15062,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic - threads: 8 - run_ahead: 5 + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 308 + - '32' + input_idx: 36 common: repetitions: 2 timeout: null @@ -15215,32 +15100,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 309 + - '32' + input_idx: 37 common: repetitions: 2 timeout: null @@ -15249,32 +15138,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 - mode: deterministic + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 310 + - '32' + input_idx: 38 common: repetitions: 2 timeout: null @@ -15283,32 +15176,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 - mode: deterministic + dtype: 32 + m: 32 + n: 32 + p: 64 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 311 + - '32' + input_idx: 39 common: repetitions: 2 timeout: null @@ -15317,32 +15214,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 312 + - '32' + input_idx: 40 common: repetitions: 2 timeout: null @@ -15351,32 +15252,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 313 + - '32' + input_idx: 41 common: repetitions: 2 timeout: null @@ -15385,32 +15290,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 314 + - '32' + input_idx: 42 common: repetitions: 2 timeout: null @@ -15419,32 +15328,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '1000' + - '32' + - '32' - '64' - input_idx: 315 + - '32' + input_idx: 43 common: repetitions: 2 timeout: null @@ -15453,32 +15366,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 316 + - '32' + - '32' + - '128' + - '32' + input_idx: 44 common: repetitions: 2 timeout: null @@ -15487,32 +15404,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '1000' - - '64' - input_idx: 317 + - '32' + - '32' + - '128' + - '32' + input_idx: 45 common: repetitions: 2 timeout: null @@ -15521,32 +15442,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' - - '64' - input_idx: 318 + - '32' + - '32' + - '128' + - '32' + input_idx: 46 common: repetitions: 2 timeout: null @@ -15555,32 +15480,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 319 + - '32' + - '32' + - '128' + - '32' + input_idx: 47 common: repetitions: 2 timeout: null @@ -15589,32 +15518,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 320 + - '32' + - '32' + - '128' + - '32' + input_idx: 48 common: repetitions: 2 timeout: null @@ -15623,32 +15556,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' - - '64' - input_idx: 321 + - '32' + - '32' + - '128' + - '32' + input_idx: 49 common: repetitions: 2 timeout: null @@ -15657,32 +15594,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 322 + - '32' + - '32' + - '128' + - '32' + input_idx: 50 common: repetitions: 2 timeout: null @@ -15691,32 +15632,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 323 + - '32' + - '32' + - '128' + - '32' + input_idx: 51 common: repetitions: 2 timeout: null @@ -15725,32 +15670,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' - - '64' - input_idx: 324 + - '32' + - '32' + - '128' + - '32' + input_idx: 52 common: repetitions: 2 timeout: null @@ -15759,32 +15708,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 325 + - '32' + - '32' + - '128' + - '32' + input_idx: 53 common: repetitions: 2 timeout: null @@ -15793,32 +15746,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 326 + - '32' + - '32' + - '128' + - '32' + input_idx: 54 common: repetitions: 2 timeout: null @@ -15827,66 +15784,74 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' - - '64' - input_idx: 327 - common: - repetitions: 2 - timeout: null + - '32' + - '32' + - '128' + - '32' + input_idx: 55 + common: + repetitions: 2 + timeout: null concurrency: 1 enabled: null results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 328 + - '32' + - '32' + - '128' + - '32' + input_idx: 56 common: repetitions: 2 timeout: null @@ -15895,32 +15860,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + threads: 4 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 329 + - '32' + - '32' + - '128' + - '32' + input_idx: 57 common: repetitions: 2 timeout: null @@ -15929,32 +15898,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '1000' - - '64' - input_idx: 330 + - '32' + - '32' + - '128' + - '32' + input_idx: 58 common: repetitions: 2 timeout: null @@ -15963,32 +15936,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 331 + - '32' + - '32' + - '128' + - '32' + input_idx: 59 common: repetitions: 2 timeout: null @@ -15997,32 +15974,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 332 + - '32' + - '32' + - '128' + - '32' + input_idx: 60 common: repetitions: 2 timeout: null @@ -16031,32 +16012,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '1000' - - '64' - input_idx: 333 + - '32' + - '32' + - '128' + - '32' + input_idx: 61 common: repetitions: 2 timeout: null @@ -16065,32 +16050,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '1000' - - '64' - input_idx: 334 + - '32' + - '32' + - '128' + - '32' + input_idx: 62 common: repetitions: 2 timeout: null @@ -16099,32 +16088,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 1000 + dtype: 32 + m: 32 + n: 32 + p: 128 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '1000' - - '64' - input_idx: 335 + - '32' + - '32' + - '128' + - '32' + input_idx: 63 common: repetitions: 2 timeout: null @@ -16133,32 +16126,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-1000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-1000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 10000 - mode: serial - threads: 4 - run_ahead: 5 + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '10000' - - '64' - input_idx: 336 + - '32' + - '32' + - '128' + - '32' + input_idx: 64 common: repetitions: 2 timeout: null @@ -16167,32 +16164,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 10000 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 + dtype: 32 + m: 32 + n: 32 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' - - '64' - input_idx: 337 + - '32' + - '32' + - '128' + - '32' + input_idx: 65 common: repetitions: 2 timeout: null @@ -16201,32 +16202,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 338 + - '32' + - '32' + input_idx: 66 common: repetitions: 2 timeout: null @@ -16235,32 +16240,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '10000' + - '32' - '64' - input_idx: 339 + - '32' + - '32' + input_idx: 67 common: repetitions: 2 timeout: null @@ -16269,32 +16278,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 340 + - '32' + - '32' + input_idx: 68 common: repetitions: 2 timeout: null @@ -16303,32 +16316,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 341 + - '32' + - '32' + input_idx: 69 common: repetitions: 2 timeout: null @@ -16337,32 +16354,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + values: + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 342 + - '32' + - '32' + input_idx: 70 common: repetitions: 2 timeout: null @@ -16371,32 +16392,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 343 + - '32' + - '32' + input_idx: 71 common: repetitions: 2 timeout: null @@ -16405,32 +16430,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 344 + - '32' + - '32' + input_idx: 72 common: repetitions: 2 timeout: null @@ -16439,32 +16468,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 345 + - '32' + - '32' + input_idx: 73 common: repetitions: 2 timeout: null @@ -16473,32 +16506,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 346 + - '32' + - '32' + input_idx: 74 common: repetitions: 2 timeout: null @@ -16507,32 +16544,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 347 + - '32' + - '32' + input_idx: 75 common: repetitions: 2 timeout: null @@ -16541,32 +16582,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 348 + - '32' + - '32' + input_idx: 76 common: repetitions: 2 timeout: null @@ -16575,32 +16620,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 349 + - '32' + - '32' + input_idx: 77 common: repetitions: 2 timeout: null @@ -16609,32 +16658,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 350 + - '32' + - '32' + input_idx: 78 common: repetitions: 2 timeout: null @@ -16643,32 +16696,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 - run_ahead: 5 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 351 + - '32' + - '32' + input_idx: 79 common: repetitions: 2 timeout: null @@ -16677,32 +16734,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 352 + - '32' + - '32' + input_idx: 80 common: repetitions: 2 timeout: null @@ -16711,32 +16772,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + dtype: 32 + m: 32 + n: 64 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 353 + - '32' + - '32' + input_idx: 81 common: repetitions: 2 timeout: null @@ -16745,32 +16810,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 354 + - '32' + - '32' + input_idx: 82 common: repetitions: 2 timeout: null @@ -16779,32 +16848,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 355 + - '32' + - '32' + input_idx: 83 common: repetitions: 2 timeout: null @@ -16813,32 +16886,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 356 + - '32' + - '32' + input_idx: 84 common: repetitions: 2 timeout: null @@ -16847,32 +16924,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 357 + - '32' + - '32' + input_idx: 85 common: repetitions: 2 timeout: null @@ -16881,32 +16962,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 358 + - '32' + - '32' + input_idx: 86 common: repetitions: 2 timeout: null @@ -16915,32 +17000,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 359 + - '32' + - '32' + input_idx: 87 common: repetitions: 2 timeout: null @@ -16949,32 +17038,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 360 + - '64' + - '32' + input_idx: 88 common: repetitions: 2 timeout: null @@ -16983,32 +17076,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '10000' + - '32' - '64' - input_idx: 361 + - '64' + - '32' + input_idx: 89 common: repetitions: 2 timeout: null @@ -17017,32 +17114,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 362 + - '64' + - '32' + input_idx: 90 common: repetitions: 2 timeout: null @@ -17051,32 +17152,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 363 + - '64' + - '32' + input_idx: 91 common: repetitions: 2 timeout: null @@ -17085,32 +17190,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 364 + - '64' + - '32' + input_idx: 92 common: repetitions: 2 timeout: null @@ -17119,32 +17228,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 365 + - '64' + - '32' + input_idx: 93 common: repetitions: 2 timeout: null @@ -17153,32 +17266,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 366 + - '64' + - '32' + input_idx: 94 common: repetitions: 2 timeout: null @@ -17187,32 +17304,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 367 + - '64' + - '32' + input_idx: 95 common: repetitions: 2 timeout: null @@ -17221,32 +17342,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 368 + - '64' + - '32' + input_idx: 96 common: repetitions: 2 timeout: null @@ -17255,32 +17380,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 369 + - '64' + - '32' + input_idx: 97 common: repetitions: 2 timeout: null @@ -17289,32 +17418,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 370 + - '64' + - '32' + input_idx: 98 common: repetitions: 2 timeout: null @@ -17323,33 +17456,37 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 371 - common: + - '64' + - '32' + input_idx: 99 + common: repetitions: 2 timeout: null concurrency: 1 @@ -17357,32 +17494,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 372 + - '64' + - '32' + input_idx: 100 common: repetitions: 2 timeout: null @@ -17391,32 +17532,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 373 + - '64' + - '32' + input_idx: 101 common: repetitions: 2 timeout: null @@ -17425,32 +17570,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 374 + - '64' + - '32' + input_idx: 102 common: repetitions: 2 timeout: null @@ -17459,32 +17608,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 375 + - '64' + - '32' + input_idx: 103 common: repetitions: 2 timeout: null @@ -17493,32 +17646,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 376 + - '64' + - '32' + input_idx: 104 common: repetitions: 2 timeout: null @@ -17527,32 +17684,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 10000 + dtype: 32 + m: 32 + n: 64 + p: 64 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '10000' + - '32' - '64' - input_idx: 377 + - '64' + - '32' + input_idx: 105 common: repetitions: 2 timeout: null @@ -17561,32 +17722,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-10000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-10000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 378 + - '64' + - '32' + input_idx: 106 common: repetitions: 2 timeout: null @@ -17595,32 +17760,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 - mode: serial - threads: 4 - run_ahead: 5 + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 379 + - '64' + - '32' + input_idx: 107 common: repetitions: 2 timeout: null @@ -17629,32 +17798,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 - mode: serial - threads: 4 - run_ahead: 5 + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 380 + - '64' + - '32' + input_idx: 108 common: repetitions: 2 timeout: null @@ -17663,32 +17836,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + dtype: 32 + m: 32 + n: 64 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 381 + - '64' + - '32' + input_idx: 109 common: repetitions: 2 timeout: null @@ -17697,32 +17874,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 382 + - '128' + - '32' + input_idx: 110 common: repetitions: 2 timeout: null @@ -17731,32 +17912,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '20000' + - '32' - '64' - input_idx: 383 + - '128' + - '32' + input_idx: 111 common: repetitions: 2 timeout: null @@ -17765,32 +17950,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 384 + - '128' + - '32' + input_idx: 112 common: repetitions: 2 timeout: null @@ -17799,32 +17988,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 385 + - '128' + - '32' + input_idx: 113 common: repetitions: 2 timeout: null @@ -17833,32 +18026,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 386 + - '128' + - '32' + input_idx: 114 common: repetitions: 2 timeout: null @@ -17867,32 +18064,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 387 + - '128' + - '32' + input_idx: 115 common: repetitions: 2 timeout: null @@ -17901,32 +18102,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 388 + - '128' + - '32' + input_idx: 116 common: repetitions: 2 timeout: null @@ -17935,32 +18140,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 389 + - '128' + - '32' + input_idx: 117 common: repetitions: 2 timeout: null @@ -17969,32 +18178,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 390 + - '128' + - '32' + input_idx: 118 common: repetitions: 2 timeout: null @@ -18003,32 +18216,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 391 + - '128' + - '32' + input_idx: 119 common: repetitions: 2 timeout: null @@ -18037,32 +18254,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 392 + - '128' + - '32' + input_idx: 120 common: repetitions: 2 timeout: null @@ -18071,32 +18292,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 393 + - '128' + - '32' + input_idx: 121 common: repetitions: 2 timeout: null @@ -18105,32 +18330,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 394 + - '128' + - '32' + input_idx: 122 common: repetitions: 2 timeout: null @@ -18139,32 +18368,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + dtype: 32 + m: 32 + n: 64 + p: 128 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 395 + - '128' + - '32' + input_idx: 123 common: repetitions: 2 timeout: null @@ -18173,32 +18406,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 396 + - '128' + - '32' + input_idx: 124 common: repetitions: 2 timeout: null @@ -18207,32 +18444,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 397 + - '128' + - '32' + input_idx: 125 common: repetitions: 2 timeout: null @@ -18241,32 +18482,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 398 + - '128' + - '32' + input_idx: 126 common: repetitions: 2 timeout: null @@ -18275,32 +18520,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 399 + - '128' + - '32' + input_idx: 127 common: repetitions: 2 timeout: null @@ -18309,32 +18558,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 400 + - '128' + - '32' + input_idx: 128 common: repetitions: 2 timeout: null @@ -18343,32 +18596,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 401 + - '128' + - '32' + input_idx: 129 common: repetitions: 2 timeout: null @@ -18377,32 +18634,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 402 + - '128' + - '32' + input_idx: 130 common: repetitions: 2 timeout: null @@ -18411,32 +18672,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 64 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' + - '32' - '64' - input_idx: 403 + - '128' + - '32' + input_idx: 131 common: repetitions: 2 timeout: null @@ -18445,32 +18710,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '64' - input_idx: 404 + - '32' + - '128' + - '32' + - '32' + input_idx: 132 common: repetitions: 2 timeout: null @@ -18479,32 +18748,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '20000' - - '64' - input_idx: 405 + - '32' + - '128' + - '32' + - '32' + input_idx: 133 common: repetitions: 2 timeout: null @@ -18513,32 +18786,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '64' - input_idx: 406 + - '32' + - '128' + - '32' + - '32' + input_idx: 134 common: repetitions: 2 timeout: null @@ -18547,32 +18824,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '64' - input_idx: 407 + - '32' + - '128' + - '32' + - '32' + input_idx: 135 common: repetitions: 2 timeout: null @@ -18581,32 +18862,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '64' - input_idx: 408 + - '32' + - '128' + - '32' + - '32' + input_idx: 136 common: repetitions: 2 timeout: null @@ -18615,32 +18900,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '64' - input_idx: 409 + - '32' + - '128' + - '32' + - '32' + input_idx: 137 common: repetitions: 2 timeout: null @@ -18649,32 +18938,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '64' - input_idx: 410 + - '32' + - '128' + - '32' + - '32' + input_idx: 138 common: repetitions: 2 timeout: null @@ -18683,32 +18976,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '20000' - - '64' - input_idx: 411 + - '32' + - '128' + - '32' + - '32' + input_idx: 139 common: repetitions: 2 timeout: null @@ -18717,32 +19014,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '64' - input_idx: 412 + - '32' + - '128' + - '32' + - '32' + input_idx: 140 common: repetitions: 2 timeout: null @@ -18751,32 +19052,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 20000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '64' - input_idx: 413 + - '32' + - '128' + - '32' + - '32' + input_idx: 141 common: repetitions: 2 timeout: null @@ -18785,32 +19090,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '20000' - - '64' - input_idx: 414 + - '32' + - '128' + - '32' + - '32' + input_idx: 142 common: repetitions: 2 timeout: null @@ -18819,32 +19128,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '20000' - - '64' - input_idx: 415 + - '32' + - '128' + - '32' + - '32' + input_idx: 143 common: repetitions: 2 timeout: null @@ -18853,32 +19166,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '20000' - - '64' - input_idx: 416 + - '32' + - '128' + - '32' + - '32' + input_idx: 144 common: repetitions: 2 timeout: null @@ -18887,32 +19204,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '20000' - - '64' - input_idx: 417 + - '32' + - '128' + - '32' + - '32' + input_idx: 145 common: repetitions: 2 timeout: null @@ -18921,32 +19242,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '20000' - - '64' - input_idx: 418 + - '32' + - '128' + - '32' + - '32' + input_idx: 146 common: repetitions: 2 timeout: null @@ -18955,32 +19280,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 20000 + dtype: 32 + m: 32 + n: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '20000' - - '64' - input_idx: 419 + - '32' + - '128' + - '32' + - '32' + input_idx: 147 common: repetitions: 2 timeout: null @@ -18989,32 +19318,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-20000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-20000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 420 + - '32' + - '128' + - '32' + - '32' + input_idx: 148 common: repetitions: 2 timeout: null @@ -19023,32 +19356,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '64' - input_idx: 421 + - '32' + - '128' + - '32' + - '32' + input_idx: 149 common: repetitions: 2 timeout: null @@ -19057,32 +19394,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 422 + - '32' + - '128' + - '32' + - '32' + input_idx: 150 common: repetitions: 2 timeout: null @@ -19091,32 +19432,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 - run_ahead: 5 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '100000' - - '64' - input_idx: 423 + - '32' + - '128' + - '32' + - '32' + input_idx: 151 common: repetitions: 2 timeout: null @@ -19125,32 +19470,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '64' - input_idx: 424 + - '32' + - '128' + - '32' + - '32' + input_idx: 152 common: repetitions: 2 timeout: null @@ -19159,32 +19508,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 + dtype: 32 + m: 32 + n: 128 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '64' - input_idx: 425 + - '32' + - '128' + - '32' + - '32' + input_idx: 153 common: repetitions: 2 timeout: null @@ -19193,32 +19546,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 426 + - '32' + input_idx: 154 common: repetitions: 2 timeout: null @@ -19227,32 +19584,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '100000' + - '32' + - '128' - '64' - input_idx: 427 + - '32' + input_idx: 155 common: repetitions: 2 timeout: null @@ -19261,32 +19622,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 428 + - '32' + input_idx: 156 common: repetitions: 2 timeout: null @@ -19295,32 +19660,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 429 + - '32' + input_idx: 157 common: repetitions: 2 timeout: null @@ -19329,32 +19698,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 430 + - '32' + input_idx: 158 common: repetitions: 2 timeout: null @@ -19363,32 +19736,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 431 + - '32' + input_idx: 159 common: repetitions: 2 timeout: null @@ -19397,32 +19774,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 432 + - '32' + input_idx: 160 common: repetitions: 2 timeout: null @@ -19431,32 +19812,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 433 + - '32' + input_idx: 161 common: repetitions: 2 timeout: null @@ -19465,32 +19850,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 434 + - '32' + input_idx: 162 common: repetitions: 2 timeout: null @@ -19499,32 +19888,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 435 + - '32' + input_idx: 163 common: repetitions: 2 timeout: null @@ -19533,32 +19926,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 436 + - '32' + input_idx: 164 common: repetitions: 2 timeout: null @@ -19567,32 +19964,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: deterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 437 + - '32' + input_idx: 165 common: repetitions: 2 timeout: null @@ -19601,32 +20002,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 438 + - '32' + input_idx: 166 common: repetitions: 2 timeout: null @@ -19635,32 +20040,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 439 + - '32' + input_idx: 167 common: repetitions: 2 timeout: null @@ -19669,32 +20078,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 440 + - '32' + input_idx: 168 common: repetitions: 2 timeout: null @@ -19703,32 +20116,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 441 + - '32' + input_idx: 169 common: repetitions: 2 timeout: null @@ -19737,32 +20154,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 442 + - '32' + input_idx: 170 common: repetitions: 2 timeout: null @@ -19771,32 +20192,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 443 + - '32' + input_idx: 171 common: repetitions: 2 timeout: null @@ -19805,32 +20230,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 444 + - '32' + input_idx: 172 common: repetitions: 2 timeout: null @@ -19839,32 +20268,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 445 + - '32' + input_idx: 173 common: repetitions: 2 timeout: null @@ -19873,32 +20306,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 446 + - '32' + input_idx: 174 common: repetitions: 2 timeout: null @@ -19907,32 +20344,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '100000' + - '32' + - '128' - '64' - input_idx: 447 + - '32' + input_idx: 175 common: repetitions: 2 timeout: null @@ -19941,32 +20382,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 448 + - '32' + - '128' + - '128' + - '32' + input_idx: 176 common: repetitions: 2 timeout: null @@ -19975,32 +20420,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '100000' - - '64' - input_idx: 449 + - '32' + - '128' + - '128' + - '32' + input_idx: 177 common: repetitions: 2 timeout: null @@ -20009,32 +20458,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '64' - input_idx: 450 + - '32' + - '128' + - '128' + - '32' + input_idx: 178 common: repetitions: 2 timeout: null @@ -20043,32 +20496,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '64' - input_idx: 451 + - '32' + - '128' + - '128' + - '32' + input_idx: 179 common: repetitions: 2 timeout: null @@ -20077,32 +20534,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 452 + - '32' + - '128' + - '128' + - '32' + input_idx: 180 common: repetitions: 2 timeout: null @@ -20111,32 +20572,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '64' - input_idx: 453 + - '32' + - '128' + - '128' + - '32' + input_idx: 181 common: repetitions: 2 timeout: null @@ -20145,32 +20610,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 454 + - '32' + - '128' + - '128' + - '32' + input_idx: 182 common: repetitions: 2 timeout: null @@ -20179,32 +20648,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '64' - input_idx: 455 + - '32' + - '128' + - '128' + - '32' + input_idx: 183 common: repetitions: 2 timeout: null @@ -20213,32 +20686,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '100000' - - '64' - input_idx: 456 + - '32' + - '128' + - '128' + - '32' + input_idx: 184 common: repetitions: 2 timeout: null @@ -20247,32 +20724,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 100000 - mode: nondeterministic + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '64' - input_idx: 457 + - '32' + - '128' + - '128' + - '32' + input_idx: 185 common: repetitions: 2 timeout: null @@ -20281,32 +20762,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 458 + - '32' + - '128' + - '128' + - '32' + input_idx: 186 common: repetitions: 2 timeout: null @@ -20315,32 +20800,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '100000' - - '64' - input_idx: 459 + - '32' + - '128' + - '128' + - '32' + input_idx: 187 common: repetitions: 2 timeout: null @@ -20349,32 +20838,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '100000' - - '64' - input_idx: 460 + - '32' + - '128' + - '128' + - '32' + input_idx: 188 common: repetitions: 2 timeout: null @@ -20383,32 +20876,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 100000 + dtype: 32 + m: 32 + n: 128 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '100000' - - '64' - input_idx: 461 + - '32' + - '128' + - '128' + - '32' + input_idx: 189 common: repetitions: 2 timeout: null @@ -20417,32 +20914,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-100000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-100000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 500000 - mode: serial + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '64' - input_idx: 462 + - '32' + - '128' + - '128' + - '32' + input_idx: 190 common: repetitions: 2 timeout: null @@ -20451,32 +20952,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 values: - dtype: 64 - length: 500000 - mode: serial + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '64' - input_idx: 463 + - '32' + - '128' + - '128' + - '32' + input_idx: 191 common: repetitions: 2 timeout: null @@ -20485,32 +20990,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '500000' - - '64' - input_idx: 464 + - '32' + - '128' + - '128' + - '32' + input_idx: 192 common: repetitions: 2 timeout: null @@ -20519,32 +21028,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - - '64' - input_idx: 465 + - '32' + - '128' + - '128' + - '32' + input_idx: 193 common: repetitions: 2 timeout: null @@ -20553,32 +21066,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: serial - threads: 4 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '500000' - - '64' - input_idx: 466 + - '32' + - '128' + - '128' + - '32' + input_idx: 194 common: repetitions: 2 timeout: null @@ -20587,32 +21104,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '64' - input_idx: 467 + - '32' + - '128' + - '128' + - '32' + input_idx: 195 common: repetitions: 2 timeout: null @@ -20621,32 +21142,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 - mode: deterministic - threads: 4 - run_ahead: 5 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '500000' - - '64' - input_idx: 468 + - '32' + - '128' + - '128' + - '32' + input_idx: 196 common: repetitions: 2 timeout: null @@ -20655,32 +21180,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 + dtype: 32 + m: 32 + n: 128 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - - '64' - input_idx: 469 + - '32' + - '128' + - '128' + - '32' + input_idx: 197 common: repetitions: 2 timeout: null @@ -20689,32 +21218,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: deterministic + dtype: 32 + m: 64 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 470 + - '32' + - '32' + - '32' + input_idx: 198 common: repetitions: 2 timeout: null @@ -20723,32 +21256,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: deterministic + dtype: 32 + m: 64 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '500000' - '64' - input_idx: 471 + - '32' + - '32' + - '32' + input_idx: 199 common: repetitions: 2 timeout: null @@ -20757,32 +21294,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: deterministic + dtype: 32 + m: 64 + n: 32 + p: 32 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 472 + - '32' + - '32' + - '32' + input_idx: 200 common: repetitions: 2 timeout: null @@ -20791,32 +21332,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: deterministic - threads: 4 + dtype: 32 + m: 64 + n: 32 + p: 32 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - '64' - input_idx: 473 + - '32' + - '32' + - '32' + input_idx: 201 common: repetitions: 2 timeout: null @@ -20825,32 +21370,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 474 + - '32' + - '32' + - '32' + input_idx: 202 common: repetitions: 2 timeout: null @@ -20859,32 +21408,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - '64' - input_idx: 475 + - '32' + - '32' + - '32' + input_idx: 203 common: repetitions: 2 timeout: null @@ -20893,32 +21446,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 476 + - '32' + - '32' + - '32' + input_idx: 204 common: repetitions: 2 timeout: null @@ -20927,32 +21484,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - '64' - input_idx: 477 + - '32' + - '32' + - '32' + input_idx: 205 common: repetitions: 2 timeout: null @@ -20961,32 +21522,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 478 + - '32' + - '32' + - '32' + input_idx: 206 common: repetitions: 2 timeout: null @@ -20995,32 +21560,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - '64' - input_idx: 479 + - '32' + - '32' + - '32' + input_idx: 207 common: repetitions: 2 timeout: null @@ -21029,22 +21598,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 run_ahead: 5 @@ -21052,9 +21623,11 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 480 + - '32' + - '32' + - '32' + input_idx: 208 common: repetitions: 2 timeout: null @@ -21063,32 +21636,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - '64' - input_idx: 481 + - '32' + - '32' + - '32' + input_idx: 209 common: repetitions: 2 timeout: null @@ -21097,32 +21674,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 482 + - '32' + - '32' + - '32' + input_idx: 210 common: repetitions: 2 timeout: null @@ -21131,32 +21712,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - '64' - input_idx: 483 + - '32' + - '32' + - '32' + input_idx: 211 common: repetitions: 2 timeout: null @@ -21165,32 +21750,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 484 + - '32' + - '32' + - '32' + input_idx: 212 common: repetitions: 2 timeout: null @@ -21199,32 +21788,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - '64' - input_idx: 485 + - '32' + - '32' + - '32' + input_idx: 213 common: repetitions: 2 timeout: null @@ -21233,32 +21826,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 486 + - '32' + - '32' + - '32' + input_idx: 214 common: repetitions: 2 timeout: null @@ -21267,32 +21864,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '500000' - '64' - input_idx: 487 + - '32' + - '32' + - '32' + input_idx: 215 common: repetitions: 2 timeout: null @@ -21301,32 +21902,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 488 + - '32' + - '32' + - '32' + input_idx: 216 common: repetitions: 2 timeout: null @@ -21335,32 +21940,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '500000' - '64' - input_idx: 489 + - '32' + - '32' + - '32' + input_idx: 217 common: repetitions: 2 timeout: null @@ -21369,32 +21978,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 490 + - '32' + - '32' + - '32' + input_idx: 218 common: repetitions: 2 timeout: null @@ -21403,32 +22016,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - '64' - input_idx: 491 + - '32' + - '32' + - '32' + input_idx: 219 common: repetitions: 2 timeout: null @@ -21437,32 +22054,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 492 + - '32' + - '64' + - '32' + input_idx: 220 common: repetitions: 2 timeout: null @@ -21471,32 +22092,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '500000' - '64' - input_idx: 493 + - '32' + - '64' + - '32' + input_idx: 221 common: repetitions: 2 timeout: null @@ -21505,32 +22130,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 494 + - '32' + - '64' + - '32' + input_idx: 222 common: repetitions: 2 timeout: null @@ -21539,32 +22168,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - '64' - input_idx: 495 + - '32' + - '64' + - '32' + input_idx: 223 common: repetitions: 2 timeout: null @@ -21573,32 +22206,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 496 + - '32' + - '64' + - '32' + input_idx: 224 common: repetitions: 2 timeout: null @@ -21607,32 +22244,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '500000' - '64' - input_idx: 497 + - '32' + - '64' + - '32' + input_idx: 225 common: repetitions: 2 timeout: null @@ -21641,32 +22282,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 498 + - '32' + - '64' + - '32' + input_idx: 226 common: repetitions: 2 timeout: null @@ -21675,32 +22320,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: nondeterministic + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '500000' - '64' - input_idx: 499 + - '32' + - '64' + - '32' + input_idx: 227 common: repetitions: 2 timeout: null @@ -21709,32 +22358,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: nondeterministic + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '500000' - '64' - input_idx: 500 + - '32' + - '64' + - '32' + input_idx: 228 common: repetitions: 2 timeout: null @@ -21743,32 +22396,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: - dtype: 64 - length: 500000 - mode: nondeterministic + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '500000' - '64' - input_idx: 501 + - '32' + - '64' + - '32' + input_idx: 229 common: repetitions: 2 timeout: null @@ -21777,32 +22434,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-8-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 + dtype: 32 + m: 64 + n: 32 + p: 64 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '500000' - '64' - input_idx: 502 + - '32' + - '64' + - '32' + input_idx: 230 common: repetitions: 2 timeout: null @@ -21811,32 +22472,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - - name: vectorAdd - benchmark_idx: 0 - uid: vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/vectoradd - rel_path: ./vectoradd - executable: vectoradd_l1_enabled - executable_path: /home/roman/dev/box/test-apps/vectoradd/vectoradd_l1_enabled - results_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8 + - name: simple_matrixmul + benchmark_idx: 1 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/simple_matrixmul + rel_path: ./simple_matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: - dtype: 64 - length: 500000 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + dtype: 32 + m: 64 + n: 32 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '500000' - '64' - input_idx: 503 + - '32' + - '64' + - '32' + input_idx: 231 common: repetitions: 2 timeout: null @@ -21845,37 +22510,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-cores_per_cluster-1-dtype-64-length-500000-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/trace - accelsim_traces_dir: /home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-64-length-500000/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - simple_matrixmul: - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 0 + input_idx: 232 common: repetitions: 2 timeout: null @@ -21884,36 +22548,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 1 + input_idx: 233 common: repetitions: 2 timeout: null @@ -21922,36 +22586,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 2 + input_idx: 234 common: repetitions: 2 timeout: null @@ -21960,36 +22624,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 3 + input_idx: 235 common: repetitions: 2 timeout: null @@ -21998,36 +22662,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial - threads: 4 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 4 + input_idx: 236 common: repetitions: 2 timeout: null @@ -22036,36 +22700,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: serial - threads: 4 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 5 + input_idx: 237 common: repetitions: 2 timeout: null @@ -22074,36 +22738,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 4 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 6 + input_idx: 238 common: repetitions: 2 timeout: null @@ -22112,36 +22776,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 7 + input_idx: 239 common: repetitions: 2 timeout: null @@ -22150,36 +22814,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 8 + input_idx: 240 common: repetitions: 2 timeout: null @@ -22188,36 +22852,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '64' - '32' + - '64' - '32' - - '32' - - '32' - input_idx: 9 + input_idx: 241 common: repetitions: 2 timeout: null @@ -22226,36 +22890,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 10 + input_idx: 242 common: repetitions: 2 timeout: null @@ -22264,36 +22928,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 11 + input_idx: 243 common: repetitions: 2 timeout: null @@ -22302,36 +22966,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 8 + p: 128 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 12 + input_idx: 244 common: repetitions: 2 timeout: null @@ -22340,36 +23004,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: deterministic - threads: 8 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 13 + input_idx: 245 common: repetitions: 2 timeout: null @@ -22378,36 +23042,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 14 + input_idx: 246 common: repetitions: 2 timeout: null @@ -22416,36 +23080,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 15 + input_idx: 247 common: repetitions: 2 timeout: null @@ -22454,36 +23118,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 16 + input_idx: 248 common: repetitions: 2 timeout: null @@ -22492,36 +23156,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 17 + input_idx: 249 common: repetitions: 2 timeout: null @@ -22530,36 +23194,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: nondeterministic - threads: 4 + p: 128 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 18 + input_idx: 250 common: repetitions: 2 timeout: null @@ -22568,36 +23232,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 - mode: nondeterministic - threads: 4 + p: 128 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 19 + input_idx: 251 common: repetitions: 2 timeout: null @@ -22606,36 +23270,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 20 + input_idx: 252 common: repetitions: 2 timeout: null @@ -22644,36 +23308,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 21 + input_idx: 253 common: repetitions: 2 timeout: null @@ -22682,36 +23346,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 22 + input_idx: 254 common: repetitions: 2 timeout: null @@ -22720,36 +23384,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 23 + input_idx: 255 common: repetitions: 2 timeout: null @@ -22758,36 +23422,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + parallel: null + l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 24 + input_idx: 256 common: repetitions: 2 timeout: null @@ -22796,36 +23460,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 25 + input_idx: 257 common: repetitions: 2 timeout: null @@ -22834,36 +23498,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 26 + input_idx: 258 common: repetitions: 2 timeout: null @@ -22872,36 +23536,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 27 + input_idx: 259 common: repetitions: 2 timeout: null @@ -22910,36 +23574,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 28 + input_idx: 260 common: repetitions: 2 timeout: null @@ -22948,36 +23612,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 29 + input_idx: 261 common: repetitions: 2 timeout: null @@ -22986,36 +23650,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 30 + input_idx: 262 common: repetitions: 2 timeout: null @@ -23024,36 +23688,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 64 n: 32 - p: 32 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' - '32' + - '128' - '32' - - '32' - - '32' - input_idx: 31 + input_idx: 263 common: repetitions: 2 timeout: null @@ -23062,36 +23726,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 32 + input_idx: 264 common: repetitions: 2 timeout: null @@ -23100,36 +23764,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 33 + input_idx: 265 common: repetitions: 2 timeout: null @@ -23138,36 +23802,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 34 + input_idx: 266 common: repetitions: 2 timeout: null @@ -23176,36 +23840,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 35 + input_idx: 267 common: repetitions: 2 timeout: null @@ -23214,36 +23878,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 36 + input_idx: 268 common: repetitions: 2 timeout: null @@ -23252,36 +23916,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 37 + input_idx: 269 common: repetitions: 2 timeout: null @@ -23290,36 +23954,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 38 + input_idx: 270 common: repetitions: 2 timeout: null @@ -23328,36 +23992,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 39 + input_idx: 271 common: repetitions: 2 timeout: null @@ -23366,36 +24030,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 40 + input_idx: 272 common: repetitions: 2 timeout: null @@ -23404,36 +24068,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 32 - mode: nondeterministic + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '64' + - '64' - '32' - '32' - - '32' - - '32' - input_idx: 41 + input_idx: 273 common: repetitions: 2 timeout: null @@ -23442,36 +24106,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 42 + - '32' + input_idx: 274 common: repetitions: 2 timeout: null @@ -23480,36 +24144,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 43 + - '32' + input_idx: 275 common: repetitions: 2 timeout: null @@ -23518,36 +24182,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 44 + - '32' + input_idx: 276 common: repetitions: 2 timeout: null @@ -23556,36 +24220,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 45 + - '32' + input_idx: 277 common: repetitions: 2 timeout: null @@ -23594,36 +24258,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 46 + - '32' + input_idx: 278 common: repetitions: 2 timeout: null @@ -23632,36 +24296,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: serial + m: 64 + n: 64 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 47 + - '32' + input_idx: 279 common: repetitions: 2 timeout: null @@ -23670,36 +24334,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 48 + - '32' + input_idx: 280 common: repetitions: 2 timeout: null @@ -23708,36 +24372,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 49 + - '32' + input_idx: 281 common: repetitions: 2 timeout: null @@ -23746,36 +24410,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 50 + - '32' + input_idx: 282 common: repetitions: 2 timeout: null @@ -23784,36 +24448,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 51 + - '32' + input_idx: 283 common: repetitions: 2 timeout: null @@ -23822,36 +24486,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 52 + - '32' + input_idx: 284 common: repetitions: 2 timeout: null @@ -23860,36 +24524,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + m: 64 + n: 64 + p: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '64' - '32' - input_idx: 53 + - '32' + input_idx: 285 common: repetitions: 2 timeout: null @@ -23898,36 +24562,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 54 + input_idx: 286 common: repetitions: 2 timeout: null @@ -23936,36 +24600,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 55 + input_idx: 287 common: repetitions: 2 timeout: null @@ -23974,36 +24638,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 56 + input_idx: 288 common: repetitions: 2 timeout: null @@ -24012,36 +24676,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 57 + input_idx: 289 common: repetitions: 2 timeout: null @@ -24050,36 +24714,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 58 + input_idx: 290 common: repetitions: 2 timeout: null @@ -24088,36 +24752,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 59 + input_idx: 291 common: repetitions: 2 timeout: null @@ -24126,36 +24790,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 60 + input_idx: 292 common: repetitions: 2 timeout: null @@ -24164,36 +24828,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 61 + input_idx: 293 common: repetitions: 2 timeout: null @@ -24202,36 +24866,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 62 + input_idx: 294 common: repetitions: 2 timeout: null @@ -24240,36 +24904,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 63 + input_idx: 295 common: repetitions: 2 timeout: null @@ -24278,36 +24942,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 64 + input_idx: 296 common: repetitions: 2 timeout: null @@ -24316,36 +24980,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 65 + input_idx: 297 common: repetitions: 2 timeout: null @@ -24354,36 +25018,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 66 + input_idx: 298 common: repetitions: 2 timeout: null @@ -24392,36 +25056,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 67 + input_idx: 299 common: repetitions: 2 timeout: null @@ -24430,36 +25094,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 68 + input_idx: 300 common: repetitions: 2 timeout: null @@ -24468,36 +25132,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 69 + input_idx: 301 common: repetitions: 2 timeout: null @@ -24506,36 +25170,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 70 + input_idx: 302 common: repetitions: 2 timeout: null @@ -24544,36 +25208,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 71 + input_idx: 303 common: repetitions: 2 timeout: null @@ -24582,36 +25246,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 72 + input_idx: 304 common: repetitions: 2 timeout: null @@ -24620,36 +25284,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 73 + input_idx: 305 common: repetitions: 2 timeout: null @@ -24658,36 +25322,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 74 + input_idx: 306 common: repetitions: 2 timeout: null @@ -24696,36 +25360,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 64 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '64' - '32' - input_idx: 75 + input_idx: 307 common: repetitions: 2 timeout: null @@ -24734,36 +25398,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + m: 64 + n: 64 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 76 + input_idx: 308 common: repetitions: 2 timeout: null @@ -24772,36 +25436,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + m: 64 + n: 64 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 77 + input_idx: 309 common: repetitions: 2 timeout: null @@ -24810,36 +25474,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 64 + p: 128 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 78 + input_idx: 310 common: repetitions: 2 timeout: null @@ -24848,36 +25512,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + m: 64 + n: 64 + p: 128 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 79 + input_idx: 311 common: repetitions: 2 timeout: null @@ -24886,36 +25550,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 64 + p: 128 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 80 + input_idx: 312 common: repetitions: 2 timeout: null @@ -24924,36 +25588,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 64 + p: 128 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 81 + input_idx: 313 common: repetitions: 2 timeout: null @@ -24962,36 +25626,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + m: 64 + n: 64 + p: 128 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 82 + input_idx: 314 common: repetitions: 2 timeout: null @@ -25000,36 +25664,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 64 - mode: nondeterministic + m: 64 + n: 64 + p: 128 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' - '64' + - '64' + - '128' - '32' - input_idx: 83 + input_idx: 315 common: repetitions: 2 timeout: null @@ -25038,36 +25702,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 84 + input_idx: 316 common: repetitions: 2 timeout: null @@ -25076,36 +25740,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 85 + input_idx: 317 common: repetitions: 2 timeout: null @@ -25114,36 +25778,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 86 + input_idx: 318 common: repetitions: 2 timeout: null @@ -25152,36 +25816,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 87 + input_idx: 319 common: repetitions: 2 timeout: null @@ -25190,36 +25854,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 88 + input_idx: 320 common: repetitions: 2 timeout: null @@ -25228,36 +25892,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: serial + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 89 + input_idx: 321 common: repetitions: 2 timeout: null @@ -25266,36 +25930,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 90 + input_idx: 322 common: repetitions: 2 timeout: null @@ -25304,36 +25968,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 91 + input_idx: 323 common: repetitions: 2 timeout: null @@ -25342,36 +26006,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 92 + input_idx: 324 common: repetitions: 2 timeout: null @@ -25380,36 +26044,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 93 + input_idx: 325 common: repetitions: 2 timeout: null @@ -25418,36 +26082,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 94 + input_idx: 326 common: repetitions: 2 timeout: null @@ -25456,36 +26120,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 95 + input_idx: 327 common: repetitions: 2 timeout: null @@ -25494,36 +26158,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 96 + input_idx: 328 common: repetitions: 2 timeout: null @@ -25532,36 +26196,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 + m: 64 + n: 64 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' + - '64' - '128' - '32' - input_idx: 97 + input_idx: 329 common: repetitions: 2 timeout: null @@ -25570,36 +26234,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 32 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 98 + - '32' + input_idx: 330 common: repetitions: 2 timeout: null @@ -25608,36 +26272,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 32 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 99 + - '32' + input_idx: 331 common: repetitions: 2 timeout: null @@ -25646,36 +26310,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 32 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 100 + - '32' + input_idx: 332 common: repetitions: 2 timeout: null @@ -25684,36 +26348,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 32 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 101 + - '32' + input_idx: 333 common: repetitions: 2 timeout: null @@ -25722,36 +26386,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic + m: 64 + n: 128 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 102 + - '32' + input_idx: 334 common: repetitions: 2 timeout: null @@ -25760,36 +26424,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic + m: 64 + n: 128 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 103 + - '32' + input_idx: 335 common: repetitions: 2 timeout: null @@ -25798,36 +26462,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic + m: 64 + n: 128 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 104 + - '32' + input_idx: 336 common: repetitions: 2 timeout: null @@ -25836,36 +26500,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 + m: 64 + n: 128 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 105 + - '32' + input_idx: 337 common: repetitions: 2 timeout: null @@ -25874,36 +26538,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 + m: 64 + n: 128 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 106 + - '32' + input_idx: 338 common: repetitions: 2 timeout: null @@ -25912,36 +26576,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 + m: 64 + n: 128 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 107 + - '32' + input_idx: 339 common: repetitions: 2 timeout: null @@ -25950,36 +26614,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 108 + - '32' + input_idx: 340 common: repetitions: 2 timeout: null @@ -25988,36 +26652,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 109 + - '32' + input_idx: 341 common: repetitions: 2 timeout: null @@ -26026,36 +26690,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 110 + - '32' + input_idx: 342 common: repetitions: 2 timeout: null @@ -26064,36 +26728,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 111 + - '32' + input_idx: 343 common: repetitions: 2 timeout: null @@ -26102,36 +26766,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 112 + - '32' + input_idx: 344 common: repetitions: 2 timeout: null @@ -26140,36 +26804,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 113 + - '32' + input_idx: 345 common: repetitions: 2 timeout: null @@ -26178,24 +26842,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 5 @@ -26203,11 +26867,11 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 114 + - '32' + input_idx: 346 common: repetitions: 2 timeout: null @@ -26216,36 +26880,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 115 + - '32' + input_idx: 347 common: repetitions: 2 timeout: null @@ -26254,36 +26918,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 116 + - '32' + input_idx: 348 common: repetitions: 2 timeout: null @@ -26292,36 +26956,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 117 + - '32' + input_idx: 349 common: repetitions: 2 timeout: null @@ -26330,36 +26994,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 118 + - '32' + input_idx: 350 common: repetitions: 2 timeout: null @@ -26368,36 +27032,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 32 - p: 128 + m: 64 + n: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '32' + - '64' - '128' - '32' - input_idx: 119 + - '32' + input_idx: 351 common: repetitions: 2 timeout: null @@ -26406,36 +27070,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 128 + p: 64 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 120 + input_idx: 352 common: repetitions: 2 timeout: null @@ -26444,36 +27108,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 128 + p: 64 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 121 + input_idx: 353 common: repetitions: 2 timeout: null @@ -26482,36 +27146,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 + m: 64 + n: 128 + p: 64 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 122 + input_idx: 354 common: repetitions: 2 timeout: null @@ -26520,36 +27184,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + m: 64 + n: 128 + p: 64 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 123 + input_idx: 355 common: repetitions: 2 timeout: null @@ -26558,36 +27222,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + m: 64 + n: 128 + p: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 124 + input_idx: 356 common: repetitions: 2 timeout: null @@ -26596,36 +27260,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + m: 64 + n: 128 + p: 64 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '32' + - '64' - '128' + - '64' - '32' - input_idx: 125 + input_idx: 357 common: repetitions: 2 timeout: null @@ -26634,36 +27298,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial + m: 64 + n: 128 + p: 64 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 126 + input_idx: 358 common: repetitions: 2 timeout: null @@ -26672,36 +27336,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial - threads: 4 + m: 64 + n: 128 + p: 64 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 127 + input_idx: 359 common: repetitions: 2 timeout: null @@ -26710,36 +27374,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial - threads: 4 + m: 64 + n: 128 + p: 64 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 128 + input_idx: 360 common: repetitions: 2 timeout: null @@ -26748,36 +27412,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial - threads: 4 + m: 64 + n: 128 + p: 64 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 129 + input_idx: 361 common: repetitions: 2 timeout: null @@ -26786,36 +27450,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 130 + input_idx: 362 common: repetitions: 2 timeout: null @@ -26824,36 +27488,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: serial + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 131 + input_idx: 363 common: repetitions: 2 timeout: null @@ -26862,36 +27526,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 132 + input_idx: 364 common: repetitions: 2 timeout: null @@ -26900,36 +27564,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 133 + input_idx: 365 common: repetitions: 2 timeout: null @@ -26938,36 +27602,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 134 + input_idx: 366 common: repetitions: 2 timeout: null @@ -26976,36 +27640,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 135 + input_idx: 367 common: repetitions: 2 timeout: null @@ -27014,36 +27678,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic - threads: 4 + m: 64 + n: 128 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 136 + input_idx: 368 common: repetitions: 2 timeout: null @@ -27052,36 +27716,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic - threads: 4 + m: 64 + n: 128 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 137 + input_idx: 369 common: repetitions: 2 timeout: null @@ -27090,36 +27754,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 138 + input_idx: 370 common: repetitions: 2 timeout: null @@ -27128,36 +27792,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 139 + input_idx: 371 common: repetitions: 2 timeout: null @@ -27166,36 +27830,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 140 + input_idx: 372 common: repetitions: 2 timeout: null @@ -27204,36 +27868,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic + m: 64 + n: 128 + p: 64 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '64' + - '128' - '64' - '32' - - '32' - input_idx: 141 + input_idx: 373 common: repetitions: 2 timeout: null @@ -27242,36 +27906,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 142 + input_idx: 374 common: repetitions: 2 timeout: null @@ -27280,36 +27944,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: deterministic - threads: 8 + m: 64 + n: 128 + p: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 143 + input_idx: 375 common: repetitions: 2 timeout: null @@ -27318,36 +27982,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic + m: 64 + n: 128 + p: 128 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 144 + input_idx: 376 common: repetitions: 2 timeout: null @@ -27356,36 +28020,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic + m: 64 + n: 128 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 145 + input_idx: 377 common: repetitions: 2 timeout: null @@ -27394,36 +28058,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic + m: 64 + n: 128 + p: 128 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 146 + input_idx: 378 common: repetitions: 2 timeout: null @@ -27432,36 +28096,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic + m: 64 + n: 128 + p: 128 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 147 + input_idx: 379 common: repetitions: 2 timeout: null @@ -27470,36 +28134,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic + m: 64 + n: 128 + p: 128 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 148 + input_idx: 380 common: repetitions: 2 timeout: null @@ -27508,36 +28172,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 + m: 64 + n: 128 + p: 128 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 149 + input_idx: 381 common: repetitions: 2 timeout: null @@ -27546,36 +28210,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 + m: 64 + n: 128 + p: 128 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 150 + input_idx: 382 common: repetitions: 2 timeout: null @@ -27584,36 +28248,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 + m: 64 + n: 128 + p: 128 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 151 + input_idx: 383 common: repetitions: 2 timeout: null @@ -27622,36 +28286,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 152 + input_idx: 384 common: repetitions: 2 timeout: null @@ -27660,36 +28324,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 153 + input_idx: 385 common: repetitions: 2 timeout: null @@ -27698,36 +28362,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 154 + input_idx: 386 common: repetitions: 2 timeout: null @@ -27736,36 +28400,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 155 + input_idx: 387 common: repetitions: 2 timeout: null @@ -27774,36 +28438,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 156 + input_idx: 388 common: repetitions: 2 timeout: null @@ -27812,36 +28476,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 157 + input_idx: 389 common: repetitions: 2 timeout: null @@ -27850,36 +28514,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 158 + input_idx: 390 common: repetitions: 2 timeout: null @@ -27888,36 +28552,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 159 + input_idx: 391 common: repetitions: 2 timeout: null @@ -27926,36 +28590,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 160 + input_idx: 392 common: repetitions: 2 timeout: null @@ -27964,36 +28628,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 161 + input_idx: 393 common: repetitions: 2 timeout: null @@ -28002,36 +28666,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 162 + input_idx: 394 common: repetitions: 2 timeout: null @@ -28040,36 +28704,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 32 + m: 64 + n: 128 + p: 128 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '64' + - '128' + - '128' - '32' - - '32' - input_idx: 163 + input_idx: 395 common: repetitions: 2 timeout: null @@ -28078,36 +28742,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '32' - '32' - input_idx: 164 + input_idx: 396 common: repetitions: 2 timeout: null @@ -28116,36 +28780,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: + - '128' - '32' - - '64' - '32' - '32' - input_idx: 165 + input_idx: 397 common: repetitions: 2 timeout: null @@ -28154,36 +28818,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - '32' - '32' - input_idx: 166 + input_idx: 398 common: repetitions: 2 timeout: null @@ -28192,36 +28856,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '32' - '32' - input_idx: 167 + input_idx: 399 common: repetitions: 2 timeout: null @@ -28230,36 +28894,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial + m: 128 + n: 32 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 168 + - '32' + input_idx: 400 common: repetitions: 2 timeout: null @@ -28268,36 +28932,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial + m: 128 + n: 32 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 169 + - '32' + input_idx: 401 common: repetitions: 2 timeout: null @@ -28306,36 +28970,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial + m: 128 + n: 32 + p: 32 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 170 + - '32' + input_idx: 402 common: repetitions: 2 timeout: null @@ -28344,36 +29008,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial - threads: 4 + m: 128 + n: 32 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 171 + - '32' + input_idx: 403 common: repetitions: 2 timeout: null @@ -28382,36 +29046,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial - threads: 4 + m: 128 + n: 32 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 172 + - '32' + input_idx: 404 common: repetitions: 2 timeout: null @@ -28420,36 +29084,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: serial - threads: 4 + m: 128 + n: 32 + p: 32 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 173 + - '32' + input_idx: 405 common: repetitions: 2 timeout: null @@ -28458,36 +29122,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 174 + - '32' + input_idx: 406 common: repetitions: 2 timeout: null @@ -28496,36 +29160,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 175 + - '32' + input_idx: 407 common: repetitions: 2 timeout: null @@ -28534,36 +29198,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 176 + - '32' + input_idx: 408 common: repetitions: 2 timeout: null @@ -28572,36 +29236,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 177 + - '32' + input_idx: 409 common: repetitions: 2 timeout: null @@ -28610,36 +29274,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 178 + - '32' + input_idx: 410 common: repetitions: 2 timeout: null @@ -28648,36 +29312,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 179 + - '32' + input_idx: 411 common: repetitions: 2 timeout: null @@ -28686,36 +29350,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 180 + - '32' + input_idx: 412 common: repetitions: 2 timeout: null @@ -28724,36 +29388,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 181 + - '32' + input_idx: 413 common: repetitions: 2 timeout: null @@ -28762,36 +29426,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 182 + - '32' + input_idx: 414 common: repetitions: 2 timeout: null @@ -28800,36 +29464,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 183 + - '32' + input_idx: 415 common: repetitions: 2 timeout: null @@ -28838,36 +29502,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 184 + - '32' + input_idx: 416 common: repetitions: 2 timeout: null @@ -28876,36 +29540,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: deterministic + m: 128 + n: 32 + p: 32 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - - '64' - '32' - input_idx: 185 + - '32' + input_idx: 417 common: repetitions: 2 timeout: null @@ -28914,36 +29578,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 186 + input_idx: 418 common: repetitions: 2 timeout: null @@ -28952,36 +29616,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 187 + input_idx: 419 common: repetitions: 2 timeout: null @@ -28990,36 +29654,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 188 + input_idx: 420 common: repetitions: 2 timeout: null @@ -29028,36 +29692,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 189 + input_idx: 421 common: repetitions: 2 timeout: null @@ -29066,36 +29730,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 190 + input_idx: 422 common: repetitions: 2 timeout: null @@ -29104,36 +29768,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 191 + input_idx: 423 common: repetitions: 2 timeout: null @@ -29142,36 +29806,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 192 + input_idx: 424 common: repetitions: 2 timeout: null @@ -29180,36 +29844,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 193 + input_idx: 425 common: repetitions: 2 timeout: null @@ -29218,36 +29882,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 194 + input_idx: 426 common: repetitions: 2 timeout: null @@ -29256,36 +29920,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 195 + input_idx: 427 common: repetitions: 2 timeout: null @@ -29294,36 +29958,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 196 + input_idx: 428 common: repetitions: 2 timeout: null @@ -29332,36 +29996,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 197 + input_idx: 429 common: repetitions: 2 timeout: null @@ -29370,36 +30034,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 198 + input_idx: 430 common: repetitions: 2 timeout: null @@ -29408,36 +30072,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 199 + input_idx: 431 common: repetitions: 2 timeout: null @@ -29446,36 +30110,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 200 + input_idx: 432 common: repetitions: 2 timeout: null @@ -29484,36 +30148,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + threads: 4 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 201 + input_idx: 433 common: repetitions: 2 timeout: null @@ -29522,36 +30186,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 202 + input_idx: 434 common: repetitions: 2 timeout: null @@ -29560,36 +30224,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 203 + input_idx: 435 common: repetitions: 2 timeout: null @@ -29598,36 +30262,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 204 + input_idx: 436 common: repetitions: 2 timeout: null @@ -29636,36 +30300,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 205 + input_idx: 437 common: repetitions: 2 timeout: null @@ -29674,36 +30338,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 206 + input_idx: 438 common: repetitions: 2 timeout: null @@ -29712,36 +30376,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 64 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: + - '128' - '32' - '64' - - '64' - '32' - input_idx: 207 + input_idx: 439 common: repetitions: 2 timeout: null @@ -29750,36 +30414,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + m: 128 + n: 32 + p: 128 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - - '64' + - '128' - '32' - input_idx: 208 + input_idx: 440 common: repetitions: 2 timeout: null @@ -29788,36 +30452,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + m: 128 + n: 32 + p: 128 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: + - '128' - '32' - - '64' - - '64' + - '128' - '32' - input_idx: 209 + input_idx: 441 common: repetitions: 2 timeout: null @@ -29826,36 +30490,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 210 + input_idx: 442 common: repetitions: 2 timeout: null @@ -29864,36 +30528,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 211 + input_idx: 443 common: repetitions: 2 timeout: null @@ -29902,36 +30566,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: serial + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 212 + input_idx: 444 common: repetitions: 2 timeout: null @@ -29940,36 +30604,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: serial + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 213 + input_idx: 445 common: repetitions: 2 timeout: null @@ -29978,36 +30642,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: serial + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 214 + input_idx: 446 common: repetitions: 2 timeout: null @@ -30016,36 +30680,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: serial - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 215 + input_idx: 447 common: repetitions: 2 timeout: null @@ -30054,36 +30718,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 216 + input_idx: 448 common: repetitions: 2 timeout: null @@ -30092,36 +30756,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 217 + input_idx: 449 common: repetitions: 2 timeout: null @@ -30130,36 +30794,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 218 + input_idx: 450 common: repetitions: 2 timeout: null @@ -30168,36 +30832,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 219 + input_idx: 451 common: repetitions: 2 timeout: null @@ -30206,36 +30870,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 220 + input_idx: 452 common: repetitions: 2 timeout: null @@ -30244,36 +30908,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 221 + input_idx: 453 common: repetitions: 2 timeout: null @@ -30282,36 +30946,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 222 + input_idx: 454 common: repetitions: 2 timeout: null @@ -30320,36 +30984,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 223 + input_idx: 455 common: repetitions: 2 timeout: null @@ -30358,36 +31022,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 224 + input_idx: 456 common: repetitions: 2 timeout: null @@ -30396,36 +31060,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 225 + input_idx: 457 common: repetitions: 2 timeout: null @@ -30434,36 +31098,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 226 + input_idx: 458 common: repetitions: 2 timeout: null @@ -30472,36 +31136,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 - mode: deterministic + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 227 + input_idx: 459 common: repetitions: 2 timeout: null @@ -30510,36 +31174,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 228 + input_idx: 460 common: repetitions: 2 timeout: null @@ -30548,36 +31212,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 64 + m: 128 + n: 32 p: 128 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: + - '128' - '32' - - '64' - '128' - '32' - input_idx: 229 + input_idx: 461 common: repetitions: 2 timeout: null @@ -30586,36 +31250,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 230 + - '32' + input_idx: 462 common: repetitions: 2 timeout: null @@ -30624,36 +31288,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 231 + - '32' + input_idx: 463 common: repetitions: 2 timeout: null @@ -30662,36 +31326,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 232 + - '32' + input_idx: 464 common: repetitions: 2 timeout: null @@ -30700,36 +31364,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 233 + - '32' + input_idx: 465 common: repetitions: 2 timeout: null @@ -30738,36 +31402,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 234 + - '32' + input_idx: 466 common: repetitions: 2 timeout: null @@ -30776,36 +31440,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 235 + - '32' + input_idx: 467 common: repetitions: 2 timeout: null @@ -30814,36 +31478,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic + p: 32 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 236 + - '32' + input_idx: 468 common: repetitions: 2 timeout: null @@ -30852,36 +31516,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 + p: 32 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 237 + - '32' + input_idx: 469 common: repetitions: 2 timeout: null @@ -30890,36 +31554,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + p: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 238 + - '32' + input_idx: 470 common: repetitions: 2 timeout: null @@ -30928,36 +31592,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + p: 32 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 239 + - '32' + input_idx: 471 common: repetitions: 2 timeout: null @@ -30966,36 +31630,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 240 + - '32' + input_idx: 472 common: repetitions: 2 timeout: null @@ -31004,36 +31668,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 241 + - '32' + input_idx: 473 common: repetitions: 2 timeout: null @@ -31042,36 +31706,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 242 + - '32' + input_idx: 474 common: repetitions: 2 timeout: null @@ -31080,36 +31744,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 243 + - '32' + input_idx: 475 common: repetitions: 2 timeout: null @@ -31118,36 +31782,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + threads: 4 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 244 + - '32' + input_idx: 476 common: repetitions: 2 timeout: null @@ -31156,36 +31820,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 245 + - '32' + input_idx: 477 common: repetitions: 2 timeout: null @@ -31194,36 +31858,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 246 + - '32' + input_idx: 478 common: repetitions: 2 timeout: null @@ -31232,36 +31896,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 247 + - '32' + input_idx: 479 common: repetitions: 2 timeout: null @@ -31270,36 +31934,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 248 + - '32' + input_idx: 480 common: repetitions: 2 timeout: null @@ -31308,36 +31972,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 249 + - '32' + input_idx: 481 common: repetitions: 2 timeout: null @@ -31346,36 +32010,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 250 + - '32' + input_idx: 482 common: repetitions: 2 timeout: null @@ -31384,36 +32048,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 64 - p: 128 + p: 32 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - - '64' - '128' + - '64' - '32' - input_idx: 251 + - '32' + input_idx: 483 common: repetitions: 2 timeout: null @@ -31422,24 +32086,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-64-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: serial threads: 4 run_ahead: 5 @@ -31447,11 +32111,11 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 252 + input_idx: 484 common: repetitions: 2 timeout: null @@ -31460,24 +32124,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: serial threads: 4 run_ahead: 5 @@ -31485,11 +32149,11 @@ benchmarks: cores_per_cluster: 1 memory_only: true args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 253 + input_idx: 485 common: repetitions: 2 timeout: null @@ -31498,36 +32162,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 254 + input_idx: 486 common: repetitions: 2 timeout: null @@ -31536,36 +32200,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 255 + input_idx: 487 common: repetitions: 2 timeout: null @@ -31574,36 +32238,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: serial + m: 128 + n: 64 + p: 64 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 256 + input_idx: 488 common: repetitions: 2 timeout: null @@ -31612,36 +32276,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: serial + m: 128 + n: 64 + p: 64 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 257 + input_idx: 489 common: repetitions: 2 timeout: null @@ -31650,36 +32314,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 258 + input_idx: 490 common: repetitions: 2 timeout: null @@ -31688,36 +32352,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 259 + input_idx: 491 common: repetitions: 2 timeout: null @@ -31726,36 +32390,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 260 + input_idx: 492 common: repetitions: 2 timeout: null @@ -31764,36 +32428,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 261 + input_idx: 493 common: repetitions: 2 timeout: null @@ -31802,36 +32466,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic + m: 128 + n: 64 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 262 + input_idx: 494 common: repetitions: 2 timeout: null @@ -31840,36 +32504,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic + m: 128 + n: 64 + p: 64 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 263 + input_idx: 495 common: repetitions: 2 timeout: null @@ -31878,36 +32542,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic - threads: 8 + m: 128 + n: 64 + p: 64 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 264 + input_idx: 496 common: repetitions: 2 timeout: null @@ -31916,36 +32580,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 + m: 128 + n: 64 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 265 + input_idx: 497 common: repetitions: 2 timeout: null @@ -31954,36 +32618,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 + m: 128 + n: 64 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 266 + input_idx: 498 common: repetitions: 2 timeout: null @@ -31992,36 +32656,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + m: 128 + n: 64 + p: 64 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 267 + input_idx: 499 common: repetitions: 2 timeout: null @@ -32030,36 +32694,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic + m: 128 + n: 64 + p: 64 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 268 + input_idx: 500 common: repetitions: 2 timeout: null @@ -32068,36 +32732,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: deterministic + m: 128 + n: 64 + p: 64 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 269 + input_idx: 501 common: repetitions: 2 timeout: null @@ -32106,36 +32770,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 270 + input_idx: 502 common: repetitions: 2 timeout: null @@ -32144,36 +32808,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 271 + input_idx: 503 common: repetitions: 2 timeout: null @@ -32182,36 +32846,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 272 + input_idx: 504 common: repetitions: 2 timeout: null @@ -32220,36 +32884,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 64 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' + - '64' + - '64' - '32' - - '32' - input_idx: 273 + input_idx: 505 common: repetitions: 2 timeout: null @@ -32258,36 +32922,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 274 + input_idx: 506 common: repetitions: 2 timeout: null @@ -32296,36 +32960,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 275 + input_idx: 507 common: repetitions: 2 timeout: null @@ -32334,36 +32998,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 276 + input_idx: 508 common: repetitions: 2 timeout: null @@ -32372,36 +33036,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 277 + input_idx: 509 common: repetitions: 2 timeout: null @@ -32410,36 +33074,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 278 + input_idx: 510 common: repetitions: 2 timeout: null @@ -32448,36 +33112,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 279 + input_idx: 511 common: repetitions: 2 timeout: null @@ -32486,36 +33150,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 280 + input_idx: 512 common: repetitions: 2 timeout: null @@ -32524,36 +33188,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + m: 128 + n: 64 + p: 128 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 281 + input_idx: 513 common: repetitions: 2 timeout: null @@ -32562,36 +33226,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 282 + input_idx: 514 common: repetitions: 2 timeout: null @@ -32600,36 +33264,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 - mode: nondeterministic + m: 128 + n: 64 + p: 128 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 283 + input_idx: 515 common: repetitions: 2 timeout: null @@ -32638,36 +33302,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 284 + input_idx: 516 common: repetitions: 2 timeout: null @@ -32676,36 +33340,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 285 + input_idx: 517 common: repetitions: 2 timeout: null @@ -32714,36 +33378,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 286 + input_idx: 518 common: repetitions: 2 timeout: null @@ -32752,36 +33416,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + threads: 4 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 287 + input_idx: 519 common: repetitions: 2 timeout: null @@ -32790,36 +33454,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 288 + input_idx: 520 common: repetitions: 2 timeout: null @@ -32828,36 +33492,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 289 + input_idx: 521 common: repetitions: 2 timeout: null @@ -32866,36 +33530,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 290 + input_idx: 522 common: repetitions: 2 timeout: null @@ -32904,36 +33568,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 291 + input_idx: 523 common: repetitions: 2 timeout: null @@ -32942,36 +33606,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 292 + input_idx: 524 common: repetitions: 2 timeout: null @@ -32980,36 +33644,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 32 + m: 128 + n: 64 + p: 128 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' + - '64' - '128' - '32' - - '32' - input_idx: 293 + input_idx: 525 common: repetitions: 2 timeout: null @@ -33018,36 +33682,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 + m: 128 + n: 64 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - '64' + - '128' - '32' - input_idx: 294 + input_idx: 526 common: repetitions: 2 timeout: null @@ -33056,36 +33720,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 + m: 128 + n: 64 + p: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - '64' + - '128' - '32' - input_idx: 295 + input_idx: 527 common: repetitions: 2 timeout: null @@ -33094,36 +33758,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 296 + - '32' + input_idx: 528 common: repetitions: 2 timeout: null @@ -33132,36 +33796,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 297 + - '32' + input_idx: 529 common: repetitions: 2 timeout: null @@ -33170,36 +33834,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 298 + - '32' + input_idx: 530 common: repetitions: 2 timeout: null @@ -33208,36 +33872,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 299 + - '32' + input_idx: 531 common: repetitions: 2 timeout: null @@ -33246,24 +33910,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic threads: 4 run_ahead: 5 @@ -33271,11 +33935,11 @@ benchmarks: cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 300 + - '32' + input_idx: 532 common: repetitions: 2 timeout: null @@ -33284,36 +33948,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 301 + - '32' + input_idx: 533 common: repetitions: 2 timeout: null @@ -33322,36 +33986,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 302 + - '32' + input_idx: 534 common: repetitions: 2 timeout: null @@ -33360,36 +34024,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 303 + - '32' + input_idx: 535 common: repetitions: 2 timeout: null @@ -33398,36 +34062,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 304 + - '32' + input_idx: 536 common: repetitions: 2 timeout: null @@ -33436,36 +34100,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 305 + - '32' + input_idx: 537 common: repetitions: 2 timeout: null @@ -33474,36 +34138,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 306 + - '32' + input_idx: 538 common: repetitions: 2 timeout: null @@ -33512,36 +34176,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 307 + - '32' + input_idx: 539 common: repetitions: 2 timeout: null @@ -33550,36 +34214,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 + p: 32 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 308 + - '32' + input_idx: 540 common: repetitions: 2 timeout: null @@ -33588,36 +34252,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 309 + - '32' + input_idx: 541 common: repetitions: 2 timeout: null @@ -33626,36 +34290,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 310 + - '32' + input_idx: 542 common: repetitions: 2 timeout: null @@ -33664,36 +34328,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + p: 32 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 311 + - '32' + input_idx: 543 common: repetitions: 2 timeout: null @@ -33702,36 +34366,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 312 + - '32' + input_idx: 544 common: repetitions: 2 timeout: null @@ -33740,36 +34404,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 313 + - '32' + input_idx: 545 common: repetitions: 2 timeout: null @@ -33778,36 +34442,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 314 + - '32' + input_idx: 546 common: repetitions: 2 timeout: null @@ -33816,36 +34480,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 315 + - '32' + input_idx: 547 common: repetitions: 2 timeout: null @@ -33854,36 +34518,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 316 + - '32' + input_idx: 548 common: repetitions: 2 timeout: null @@ -33892,36 +34556,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 64 + p: 32 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - - '64' + - '128' - '32' - input_idx: 317 + - '32' + input_idx: 549 common: repetitions: 2 timeout: null @@ -33930,36 +34594,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 318 + input_idx: 550 common: repetitions: 2 timeout: null @@ -33968,36 +34632,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 319 + input_idx: 551 common: repetitions: 2 timeout: null @@ -34006,36 +34670,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 320 + input_idx: 552 common: repetitions: 2 timeout: null @@ -34044,36 +34708,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 321 + input_idx: 553 common: repetitions: 2 timeout: null @@ -34082,36 +34746,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 322 + input_idx: 554 common: repetitions: 2 timeout: null @@ -34120,36 +34784,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 323 + input_idx: 555 common: repetitions: 2 timeout: null @@ -34158,36 +34822,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic - threads: 8 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 324 + input_idx: 556 common: repetitions: 2 timeout: null @@ -34196,36 +34860,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 325 + input_idx: 557 common: repetitions: 2 timeout: null @@ -34234,36 +34898,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 326 + input_idx: 558 common: repetitions: 2 timeout: null @@ -34272,36 +34936,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 327 + input_idx: 559 common: repetitions: 2 timeout: null @@ -34310,36 +34974,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 328 + input_idx: 560 common: repetitions: 2 timeout: null @@ -34348,36 +35012,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 329 + input_idx: 561 common: repetitions: 2 timeout: null @@ -34386,36 +35050,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 330 + input_idx: 562 common: repetitions: 2 timeout: null @@ -34424,36 +35088,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 331 + input_idx: 563 common: repetitions: 2 timeout: null @@ -34462,36 +35126,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 332 + input_idx: 564 common: repetitions: 2 timeout: null @@ -34500,36 +35164,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 333 + input_idx: 565 common: repetitions: 2 timeout: null @@ -34538,36 +35202,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 334 + input_idx: 566 common: repetitions: 2 timeout: null @@ -34576,36 +35240,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 64 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '64' - '32' - input_idx: 335 + input_idx: 567 common: repetitions: 2 timeout: null @@ -34614,36 +35278,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 128 - mode: serial - threads: 4 + p: 64 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' - '128' - '128' + - '64' - '32' - input_idx: 336 + input_idx: 568 common: repetitions: 2 timeout: null @@ -34652,36 +35316,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' - '128' - '128' + - '64' - '32' - input_idx: 337 + input_idx: 569 common: repetitions: 2 timeout: null @@ -34690,36 +35354,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '32' - '128' - '128' + - '64' - '32' - input_idx: 338 + input_idx: 570 common: repetitions: 2 timeout: null @@ -34728,36 +35392,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + p: 64 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '32' - '128' - '128' + - '64' - '32' - input_idx: 339 + input_idx: 571 common: repetitions: 2 timeout: null @@ -34766,36 +35430,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 340 + input_idx: 572 common: repetitions: 2 timeout: null @@ -34804,36 +35468,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 341 + input_idx: 573 common: repetitions: 2 timeout: null @@ -34842,36 +35506,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 342 + input_idx: 574 common: repetitions: 2 timeout: null @@ -34880,36 +35544,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 343 + input_idx: 575 common: repetitions: 2 timeout: null @@ -34918,36 +35582,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 344 + input_idx: 576 common: repetitions: 2 timeout: null @@ -34956,36 +35620,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 345 + input_idx: 577 common: repetitions: 2 timeout: null @@ -34994,36 +35658,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 346 + input_idx: 578 common: repetitions: 2 timeout: null @@ -35032,36 +35696,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 347 + input_idx: 579 common: repetitions: 2 timeout: null @@ -35070,36 +35734,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 348 + input_idx: 580 common: repetitions: 2 timeout: null @@ -35108,36 +35772,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 349 + input_idx: 581 common: repetitions: 2 timeout: null @@ -35146,36 +35810,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic - threads: 8 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 350 + input_idx: 582 common: repetitions: 2 timeout: null @@ -35184,36 +35848,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic - threads: 8 + mode: nondeterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 351 + input_idx: 583 common: repetitions: 2 timeout: null @@ -35222,36 +35886,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic - threads: 8 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 352 + input_idx: 584 common: repetitions: 2 timeout: null @@ -35260,36 +35924,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 353 + input_idx: 585 common: repetitions: 2 timeout: null @@ -35298,36 +35962,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 354 + input_idx: 586 common: repetitions: 2 timeout: null @@ -35336,36 +36000,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 355 + input_idx: 587 common: repetitions: 2 timeout: null @@ -35374,36 +36038,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 356 + input_idx: 588 common: repetitions: 2 timeout: null @@ -35412,36 +36076,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 357 + input_idx: 589 common: repetitions: 2 timeout: null @@ -35450,36 +36114,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 358 + input_idx: 590 common: repetitions: 2 timeout: null @@ -35488,36 +36152,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 359 + input_idx: 591 common: repetitions: 2 timeout: null @@ -35526,36 +36190,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 360 + input_idx: 592 common: repetitions: 2 timeout: null @@ -35564,36 +36228,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8 values: dtype: 32 - m: 32 + m: 128 n: 128 p: 128 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '32' + - '128' - '128' - '128' - '32' - input_idx: 361 + input_idx: 593 common: repetitions: 2 timeout: null @@ -35602,36 +36266,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 362 + input_idx: 594 common: repetitions: 2 timeout: null @@ -35640,36 +36304,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 363 + input_idx: 595 common: repetitions: 2 timeout: null @@ -35678,36 +36342,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 364 + input_idx: 596 common: repetitions: 2 timeout: null @@ -35716,36 +36380,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 365 + input_idx: 597 common: repetitions: 2 timeout: null @@ -35754,36 +36418,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-112-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 366 + input_idx: 598 common: repetitions: 2 timeout: null @@ -35792,36 +36456,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 367 + input_idx: 599 common: repetitions: 2 timeout: null @@ -35830,36 +36494,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 368 + input_idx: 600 common: repetitions: 2 timeout: null @@ -35868,36 +36532,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 369 + input_idx: 601 common: repetitions: 2 timeout: null @@ -35906,36 +36570,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 370 + input_idx: 602 common: repetitions: 2 timeout: null @@ -35944,36 +36608,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 32 - n: 128 - p: 128 - mode: nondeterministic + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 371 + input_idx: 603 common: repetitions: 2 timeout: null @@ -35982,36 +36646,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 372 + input_idx: 604 common: repetitions: 2 timeout: null @@ -36020,36 +36684,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 373 + input_idx: 605 common: repetitions: 2 timeout: null @@ -36058,36 +36722,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 374 + input_idx: 606 common: repetitions: 2 timeout: null @@ -36096,36 +36760,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 375 + input_idx: 607 common: repetitions: 2 timeout: null @@ -36134,36 +36798,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 376 + input_idx: 608 common: repetitions: 2 timeout: null @@ -36172,36 +36836,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-4 values: - dtype: 32 - m: 32 - n: 128 - p: 128 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: + - '512' - '32' - - '128' - - '128' + - '512' - '32' - input_idx: 377 + input_idx: 609 common: repetitions: 2 timeout: null @@ -36210,36 +36874,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-32-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-32-n-128-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 378 + input_idx: 610 common: repetitions: 2 timeout: null @@ -36248,36 +36912,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 379 + input_idx: 611 common: repetitions: 2 timeout: null @@ -36286,36 +36950,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 380 + input_idx: 612 common: repetitions: 2 timeout: null @@ -36324,36 +36988,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 381 + input_idx: 613 common: repetitions: 2 timeout: null @@ -36362,36 +37026,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 382 + input_idx: 614 common: repetitions: 2 timeout: null @@ -36400,36 +37064,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 512 + n: 32 + p: 512 + dtype: 32 args: - - '64' - - '32' + - '512' - '32' + - '512' - '32' - input_idx: 383 + input_idx: 615 common: repetitions: 2 timeout: null @@ -36438,36 +37102,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-112-p-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 384 + input_idx: 616 common: repetitions: 2 timeout: null @@ -36476,36 +37140,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 385 + input_idx: 617 common: repetitions: 2 timeout: null @@ -36514,36 +37178,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 386 + input_idx: 618 common: repetitions: 2 timeout: null @@ -36552,36 +37216,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 387 + input_idx: 619 common: repetitions: 2 timeout: null @@ -36590,36 +37254,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 388 + input_idx: 620 common: repetitions: 2 timeout: null @@ -36628,36 +37292,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 389 + input_idx: 621 common: repetitions: 2 timeout: null @@ -36666,36 +37330,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 390 + input_idx: 622 common: repetitions: 2 timeout: null @@ -36704,36 +37368,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 391 + input_idx: 623 common: repetitions: 2 timeout: null @@ -36742,36 +37406,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 392 + input_idx: 624 common: repetitions: 2 timeout: null @@ -36780,36 +37444,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 393 + input_idx: 625 common: repetitions: 2 timeout: null @@ -36818,36 +37482,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic - threads: 8 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 394 + input_idx: 626 common: repetitions: 2 timeout: null @@ -36856,36 +37520,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 - mode: deterministic - threads: 8 + mode: nondeterministic + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 395 + input_idx: 627 common: repetitions: 2 timeout: null @@ -36894,36 +37558,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 396 + input_idx: 628 common: repetitions: 2 timeout: null @@ -36932,36 +37596,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 397 + input_idx: 629 common: repetitions: 2 timeout: null @@ -36970,36 +37634,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 398 + input_idx: 630 common: repetitions: 2 timeout: null @@ -37008,36 +37672,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-4 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-4 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 399 + input_idx: 631 common: repetitions: 2 timeout: null @@ -37046,36 +37710,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 400 + input_idx: 632 common: repetitions: 2 timeout: null @@ -37084,36 +37748,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 401 + input_idx: 633 common: repetitions: 2 timeout: null @@ -37122,36 +37786,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 402 + input_idx: 634 common: repetitions: 2 timeout: null @@ -37160,36 +37824,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 403 + input_idx: 635 common: repetitions: 2 timeout: null @@ -37198,36 +37862,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 404 + input_idx: 636 common: repetitions: 2 timeout: null @@ -37236,36 +37900,36 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-4-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - name: simple_matrixmul benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-8 path: /home/roman/dev/box/test-apps/simple_matrixmul rel_path: ./simple_matrixmul executable: matrixmul executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 + results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-8 values: - dtype: 32 - m: 64 - n: 32 - p: 32 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + m: 128 + n: 512 + p: 128 + dtype: 32 args: - - '64' - - '32' - - '32' + - '128' + - '512' + - '128' - '32' - input_idx: 405 + input_idx: 637 common: repetitions: 2 timeout: null @@ -37274,36 +37938,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-112-p-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + matrixmul: + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic + rows: 32 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 406 + input_idx: 0 common: repetitions: 2 timeout: null @@ -37312,36 +37973,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic + rows: 32 + mode: serial threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '64' - - '32' - '32' - '32' - input_idx: 407 + input_idx: 1 common: repetitions: 2 timeout: null @@ -37350,36 +38007,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 + rows: 32 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 408 + input_idx: 2 common: repetitions: 2 timeout: null @@ -37388,36 +38041,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 + rows: 32 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 409 + input_idx: 3 common: repetitions: 2 timeout: null @@ -37426,36 +38075,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 + rows: 32 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 410 + input_idx: 4 common: repetitions: 2 timeout: null @@ -37464,36 +38109,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 + rows: 32 + mode: deterministic + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 411 + input_idx: 5 common: repetitions: 2 timeout: null @@ -37502,36 +38143,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 + rows: 32 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 412 + input_idx: 6 common: repetitions: 2 timeout: null @@ -37540,36 +38177,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic + rows: 32 + mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 413 + input_idx: 7 common: repetitions: 2 timeout: null @@ -37578,36 +38211,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic + rows: 32 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 414 + input_idx: 8 common: repetitions: 2 timeout: null @@ -37616,36 +38245,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 32 - mode: nondeterministic + rows: 32 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 415 + input_idx: 9 common: repetitions: 2 timeout: null @@ -37654,36 +38279,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 + rows: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 416 + input_idx: 10 common: repetitions: 2 timeout: null @@ -37692,36 +38313,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 + rows: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 417 + input_idx: 11 common: repetitions: 2 timeout: null @@ -37730,36 +38347,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 + rows: 32 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - '32' - '32' - - '32' - input_idx: 418 + input_idx: 12 common: repetitions: 2 timeout: null @@ -37768,36 +38381,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 32 + rows: 32 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - '32' - '32' - input_idx: 419 + input_idx: 13 common: repetitions: 2 timeout: null @@ -37806,36 +38415,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial + rows: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 420 + input_idx: 14 common: repetitions: 2 timeout: null @@ -37844,36 +38449,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial + rows: 32 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 421 + input_idx: 15 common: repetitions: 2 timeout: null @@ -37882,36 +38483,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial - threads: 4 + rows: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 422 + input_idx: 16 common: repetitions: 2 timeout: null @@ -37920,36 +38517,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial - threads: 4 + rows: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 423 + input_idx: 17 common: repetitions: 2 timeout: null @@ -37958,36 +38551,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial - threads: 4 + rows: 32 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 424 + input_idx: 18 common: repetitions: 2 timeout: null @@ -37996,36 +38585,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 425 + input_idx: 19 common: repetitions: 2 timeout: null @@ -38034,36 +38619,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 426 + input_idx: 20 common: repetitions: 2 timeout: null @@ -38072,36 +38653,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 + rows: 32 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - '32' - - '64' - '32' - input_idx: 427 + input_idx: 21 common: repetitions: 2 timeout: null @@ -38110,36 +38687,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-32-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic + rows: 64 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 428 + input_idx: 22 common: repetitions: 2 timeout: null @@ -38148,36 +38721,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic + rows: 64 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - '64' - '32' - - '64' - - '32' - input_idx: 429 + input_idx: 23 common: repetitions: 2 timeout: null @@ -38186,36 +38755,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic + rows: 64 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 430 + input_idx: 24 common: repetitions: 2 timeout: null @@ -38224,36 +38789,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: deterministic + rows: 64 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 431 + input_idx: 25 common: repetitions: 2 timeout: null @@ -38262,26 +38823,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 @@ -38289,9 +38848,7 @@ benchmarks: args: - '64' - '32' - - '64' - - '32' - input_idx: 432 + input_idx: 26 common: repetitions: 2 timeout: null @@ -38300,36 +38857,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 433 + input_idx: 27 common: repetitions: 2 timeout: null @@ -38338,36 +38891,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 434 + input_idx: 28 common: repetitions: 2 timeout: null @@ -38376,36 +38925,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 435 + input_idx: 29 common: repetitions: 2 timeout: null @@ -38414,36 +38959,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 436 + input_idx: 30 common: repetitions: 2 timeout: null @@ -38452,36 +38993,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 437 + input_idx: 31 common: repetitions: 2 timeout: null @@ -38490,24 +39027,22 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 run_ahead: 5 @@ -38517,9 +39052,7 @@ benchmarks: args: - '64' - '32' - - '64' - - '32' - input_idx: 438 + input_idx: 32 common: repetitions: 2 timeout: null @@ -38528,36 +39061,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 439 + input_idx: 33 common: repetitions: 2 timeout: null @@ -38566,36 +39095,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 440 + input_idx: 34 common: repetitions: 2 timeout: null @@ -38604,36 +39129,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 441 + input_idx: 35 common: repetitions: 2 timeout: null @@ -38642,36 +39163,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 442 + input_idx: 36 common: repetitions: 2 timeout: null @@ -38680,36 +39197,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 443 + input_idx: 37 common: repetitions: 2 timeout: null @@ -38718,36 +39231,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 444 + input_idx: 38 common: repetitions: 2 timeout: null @@ -38756,36 +39265,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 445 + input_idx: 39 common: repetitions: 2 timeout: null @@ -38794,36 +39299,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 446 + input_idx: 40 common: repetitions: 2 timeout: null @@ -38832,36 +39333,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - values: + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 + values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 447 + input_idx: 41 common: repetitions: 2 timeout: null @@ -38870,36 +39367,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 448 + input_idx: 42 common: repetitions: 2 timeout: null @@ -38908,36 +39401,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 64 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - '64' - '32' - - '64' - - '32' - input_idx: 449 + input_idx: 43 common: repetitions: 2 timeout: null @@ -38946,36 +39435,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-64-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 450 + input_idx: 44 common: repetitions: 2 timeout: null @@ -38984,36 +39469,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 451 + input_idx: 45 common: repetitions: 2 timeout: null @@ -39022,36 +39503,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 452 + input_idx: 46 common: repetitions: 2 timeout: null @@ -39060,36 +39537,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 453 + input_idx: 47 common: repetitions: 2 timeout: null @@ -39098,36 +39571,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 454 + input_idx: 48 common: repetitions: 2 timeout: null @@ -39136,36 +39605,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 + rows: 128 + mode: deterministic + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 455 + input_idx: 49 common: repetitions: 2 timeout: null @@ -39174,36 +39639,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + rows: 128 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 456 + input_idx: 50 common: repetitions: 2 timeout: null @@ -39212,36 +39673,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic + rows: 128 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 457 + input_idx: 51 common: repetitions: 2 timeout: null @@ -39250,36 +39707,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic + rows: 128 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 458 + input_idx: 52 common: repetitions: 2 timeout: null @@ -39288,36 +39741,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 64 - mode: nondeterministic + rows: 128 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 459 + input_idx: 53 common: repetitions: 2 timeout: null @@ -39326,36 +39775,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 460 + input_idx: 54 common: repetitions: 2 timeout: null @@ -39364,36 +39809,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 64 + rows: 128 mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '64' + - '128' - '32' - input_idx: 461 + input_idx: 55 common: repetitions: 2 timeout: null @@ -39402,36 +39843,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-64/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial + rows: 128 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 462 + input_idx: 56 common: repetitions: 2 timeout: null @@ -39440,36 +39877,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial + rows: 128 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 463 + input_idx: 57 common: repetitions: 2 timeout: null @@ -39478,36 +39911,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial + rows: 128 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 464 + input_idx: 58 common: repetitions: 2 timeout: null @@ -39516,36 +39945,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial + rows: 128 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 465 + input_idx: 59 common: repetitions: 2 timeout: null @@ -39554,36 +39979,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial - threads: 4 + rows: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 466 + input_idx: 60 common: repetitions: 2 timeout: null @@ -39592,36 +40013,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: serial - threads: 4 + rows: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 467 + input_idx: 61 common: repetitions: 2 timeout: null @@ -39630,36 +40047,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 4 + rows: 128 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 468 + input_idx: 62 common: repetitions: 2 timeout: null @@ -39668,36 +40081,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 + rows: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 469 + input_idx: 63 common: repetitions: 2 timeout: null @@ -39706,36 +40115,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 + rows: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 470 + input_idx: 64 common: repetitions: 2 timeout: null @@ -39744,36 +40149,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + rows: 128 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '32' - '128' - '32' - input_idx: 471 + input_idx: 65 common: repetitions: 2 timeout: null @@ -39782,36 +40183,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-128-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic + rows: 256 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 472 + input_idx: 66 common: repetitions: 2 timeout: null @@ -39820,36 +40217,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic + rows: 256 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 473 + input_idx: 67 common: repetitions: 2 timeout: null @@ -39858,36 +40251,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 8 + rows: 256 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 474 + input_idx: 68 common: repetitions: 2 timeout: null @@ -39896,36 +40285,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: deterministic - threads: 8 + rows: 256 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 475 + input_idx: 69 common: repetitions: 2 timeout: null @@ -39934,36 +40319,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 476 + input_idx: 70 common: repetitions: 2 timeout: null @@ -39972,36 +40353,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 477 + input_idx: 71 common: repetitions: 2 timeout: null @@ -40010,36 +40387,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 478 + input_idx: 72 common: repetitions: 2 timeout: null @@ -40048,36 +40421,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: deterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 479 + input_idx: 73 common: repetitions: 2 timeout: null @@ -40086,36 +40455,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 + rows: 256 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 480 + input_idx: 74 common: repetitions: 2 timeout: null @@ -40124,36 +40489,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 + rows: 256 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 481 + input_idx: 75 common: repetitions: 2 timeout: null @@ -40162,36 +40523,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 482 + input_idx: 76 common: repetitions: 2 timeout: null @@ -40200,36 +40557,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 483 + input_idx: 77 common: repetitions: 2 timeout: null @@ -40238,36 +40591,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 484 + input_idx: 78 common: repetitions: 2 timeout: null @@ -40276,36 +40625,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 485 + input_idx: 79 common: repetitions: 2 timeout: null @@ -40314,36 +40659,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 486 + input_idx: 80 common: repetitions: 2 timeout: null @@ -40352,36 +40693,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 487 + input_idx: 81 common: repetitions: 2 timeout: null @@ -40390,36 +40727,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 488 + input_idx: 82 common: repetitions: 2 timeout: null @@ -40428,36 +40761,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic - threads: 4 - run_ahead: 10 + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 489 + input_idx: 83 common: repetitions: 2 timeout: null @@ -40466,36 +40795,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 490 + input_idx: 84 common: repetitions: 2 timeout: null @@ -40504,36 +40829,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic - threads: 4 + threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 491 + input_idx: 85 common: repetitions: 2 timeout: null @@ -40542,36 +40863,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 492 + input_idx: 86 common: repetitions: 2 timeout: null @@ -40580,36 +40897,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 + rows: 256 mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '256' - '32' - input_idx: 493 + input_idx: 87 common: repetitions: 2 timeout: null @@ -40618,36 +40931,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-256-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 + rows: 512 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 494 + input_idx: 88 common: repetitions: 2 timeout: null @@ -40656,36 +40965,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 + rows: 512 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 495 + input_idx: 89 common: repetitions: 2 timeout: null @@ -40694,36 +40999,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 + rows: 512 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 496 + input_idx: 90 common: repetitions: 2 timeout: null @@ -40732,36 +41033,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 + rows: 512 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 497 + input_idx: 91 common: repetitions: 2 timeout: null @@ -40770,36 +41067,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-112-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 + rows: 512 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 498 + input_idx: 92 common: repetitions: 2 timeout: null @@ -40808,36 +41101,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 + rows: 512 + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 499 + input_idx: 93 common: repetitions: 2 timeout: null @@ -40846,36 +41135,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + rows: 512 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 500 + input_idx: 94 common: repetitions: 2 timeout: null @@ -40884,36 +41169,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic + rows: 512 + mode: deterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 501 + input_idx: 95 common: repetitions: 2 timeout: null @@ -40922,36 +41203,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic + rows: 512 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 502 + input_idx: 96 common: repetitions: 2 timeout: null @@ -40960,36 +41237,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 32 - p: 128 - mode: nondeterministic + rows: 512 + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '32' - - '128' + - '512' - '32' - input_idx: 503 + input_idx: 97 common: repetitions: 2 timeout: null @@ -40998,36 +41271,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-32-p-128/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-112-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 504 + input_idx: 98 common: repetitions: 2 timeout: null @@ -41036,36 +41305,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 505 + input_idx: 99 common: repetitions: 2 timeout: null @@ -41074,36 +41339,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 506 + input_idx: 100 common: repetitions: 2 timeout: null @@ -41112,36 +41373,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 507 + input_idx: 101 common: repetitions: 2 timeout: null @@ -41150,36 +41407,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 508 + input_idx: 102 common: repetitions: 2 timeout: null @@ -41188,36 +41441,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-4 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-4 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: serial + rows: 512 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 509 + input_idx: 103 common: repetitions: 2 timeout: null @@ -41226,36 +41475,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-4/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 + rows: 512 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 510 + input_idx: 104 common: repetitions: 2 timeout: null @@ -41264,36 +41509,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 + rows: 512 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 511 + input_idx: 105 common: repetitions: 2 timeout: null @@ -41302,36 +41543,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-8 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 + rows: 512 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 512 + input_idx: 106 common: repetitions: 2 timeout: null @@ -41340,36 +41577,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-5-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 + rows: 512 + mode: nondeterministic + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - - '64' - - '64' - - '32' + - '512' - '32' - input_idx: 513 + input_idx: 107 common: repetitions: 2 timeout: null @@ -41378,40515 +41611,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 values: dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 514 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 515 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 516 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 517 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 518 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 519 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 520 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 521 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 522 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 523 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 524 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 525 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 526 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 527 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 528 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 529 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 530 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 531 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 532 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 533 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 534 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 535 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 536 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 537 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 538 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 539 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 540 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 541 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 542 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 543 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 544 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '32' - - '32' - input_idx: 545 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 546 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 547 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 548 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 549 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 550 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 551 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 552 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 553 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 554 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 555 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 556 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 557 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 558 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 559 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 560 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 561 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 562 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 563 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 564 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 565 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 566 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 567 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 568 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 569 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 570 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 571 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 572 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 573 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 574 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 575 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 576 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 577 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 578 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 579 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 580 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 581 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 582 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 583 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 584 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 585 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 586 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '64' - - '32' - input_idx: 587 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 588 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 589 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 590 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 591 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 592 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 593 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 594 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 595 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 596 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 597 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 598 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 599 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 600 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 601 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 602 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 603 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 604 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 605 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 606 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 607 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 608 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 609 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 610 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 611 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 612 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 613 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 614 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 615 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 616 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 617 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 618 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 619 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 620 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 621 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 622 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 623 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 624 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 625 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 626 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 627 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 628 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '64' - - '128' - - '32' - input_idx: 629 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 630 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 631 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 632 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 633 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 634 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 635 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 636 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 637 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 638 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 639 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 640 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 641 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 642 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 643 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 644 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 645 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 646 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 647 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 648 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 649 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 650 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 651 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 652 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 653 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 654 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 655 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 656 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 657 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 658 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 659 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 660 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 661 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 662 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 663 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 664 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 665 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 666 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 667 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 668 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 669 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 670 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '32' - - '32' - input_idx: 671 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 672 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 673 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 674 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 675 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 676 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 677 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 678 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 679 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 680 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 681 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 682 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 683 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 684 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 685 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 686 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 687 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 688 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 689 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 690 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 691 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 692 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 693 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 694 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 695 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 696 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 697 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 698 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 699 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 700 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 701 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 702 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 703 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 704 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 705 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 706 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 707 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 708 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 709 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 710 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 711 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 712 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '64' - - '32' - input_idx: 713 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 714 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 715 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 716 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 717 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 718 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 719 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 720 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 721 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 722 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 723 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 724 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 725 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 726 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 727 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 728 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 729 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 730 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 731 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 732 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 733 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 734 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 735 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 736 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 737 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 738 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 739 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 740 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 741 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 742 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 743 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 744 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 745 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 746 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 747 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 748 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 749 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 750 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 751 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 752 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 753 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 754 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 64 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '128' - - '128' - - '32' - input_idx: 755 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-64-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-64-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 756 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 757 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 758 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 759 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 760 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 761 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 762 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 763 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 764 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 765 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 766 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 767 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 768 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 769 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 770 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 771 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 772 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 773 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 774 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 775 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 776 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 777 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 778 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 779 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 780 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 781 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 782 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 783 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 784 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 785 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 786 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 787 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 788 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 789 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 790 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 791 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 792 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 793 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 794 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 795 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 796 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '32' - - '32' - input_idx: 797 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 798 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 799 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 800 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 801 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 802 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 803 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 804 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 805 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 806 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 807 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 808 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 809 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 810 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 811 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 812 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 813 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 814 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 815 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 816 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 817 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 818 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 819 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 820 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 821 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 822 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 823 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 824 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 825 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 826 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 827 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 828 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 829 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 830 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 831 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 832 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 833 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 834 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 835 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 836 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 837 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 838 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '64' - - '32' - input_idx: 839 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 840 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 841 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 842 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 843 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 844 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 845 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 846 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 847 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 848 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 849 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 850 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 851 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 852 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 853 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 854 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 855 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 856 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 857 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 858 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 859 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 860 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 861 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 862 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 863 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 864 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 865 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 866 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 867 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 868 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 869 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 870 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 871 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 872 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 873 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 874 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 875 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 876 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 877 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 878 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 879 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 880 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 32 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - - '128' - - '32' - input_idx: 881 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-32-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 882 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 883 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 884 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 885 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 886 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 887 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 888 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 889 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 890 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 891 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 892 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 893 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 894 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 895 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 896 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 897 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 898 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 899 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 900 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 901 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 902 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 903 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 904 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 905 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 906 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 907 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 908 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 909 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 910 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 911 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 912 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 913 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 914 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 915 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 916 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 917 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 918 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 919 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 920 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 921 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 922 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '32' - - '32' - input_idx: 923 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 924 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 925 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 926 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 927 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 928 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 929 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 930 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 931 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 932 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 933 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 934 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 935 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 936 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 937 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 938 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 939 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 940 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 941 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 942 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 943 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 944 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 945 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 946 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 947 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 948 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 949 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 950 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 951 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 952 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 953 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 954 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 955 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 956 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 957 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 958 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 959 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 960 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 961 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 962 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 963 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 964 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '64' - - '32' - input_idx: 965 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 966 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 967 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 968 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 969 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 970 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 971 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 972 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 973 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 974 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 975 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 976 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 977 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 978 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 979 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 980 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 981 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 982 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 983 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 984 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 985 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 986 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 987 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 988 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 989 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 990 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 991 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 992 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 993 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 994 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 995 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 996 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 997 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 998 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 999 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1000 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1001 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1002 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1003 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1004 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1005 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1006 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 64 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '64' - - '128' - - '32' - input_idx: 1007 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-64-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-64-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1008 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1009 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1010 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1011 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1012 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1013 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1014 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1015 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1016 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1017 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1018 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1019 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1020 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1021 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1022 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1023 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1024 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1025 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1026 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1027 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1028 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1029 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1030 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1031 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1032 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1033 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1034 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1035 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1036 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1037 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1038 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1039 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1040 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1041 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1042 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1043 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1044 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1045 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1046 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1047 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1048 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '32' - - '32' - input_idx: 1049 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-32/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1050 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1051 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1052 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1053 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1054 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1055 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1056 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1057 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1058 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1059 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1060 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1061 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1062 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1063 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1064 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1065 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1066 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1067 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1068 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1069 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1070 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1071 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1072 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1073 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1074 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1075 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1076 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1077 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1078 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1079 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1080 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1081 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1082 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1083 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1084 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1085 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1086 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1087 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1088 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1089 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1090 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '64' - - '32' - input_idx: 1091 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-64/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1092 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1093 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1094 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1095 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1096 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1097 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1098 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1099 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1100 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1101 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1102 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1103 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1104 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1105 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1106 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1107 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1108 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1109 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1110 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1111 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1112 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1113 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1114 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1115 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1116 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1117 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1118 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1119 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1120 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1121 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1122 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1123 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1124 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1125 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1126 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1127 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1128 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1129 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1130 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1131 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1132 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - dtype: 32 - m: 128 - n: 128 - p: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '128' - - '128' - - '32' - input_idx: 1133 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-128-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-128-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1134 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1135 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1136 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1137 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1138 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1139 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-serial-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1140 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1141 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1142 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1143 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1144 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1145 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1146 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1147 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1148 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1149 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1150 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1151 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-deterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1152 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1153 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1154 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1155 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1156 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1157 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1158 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1159 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1160 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1161 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1162 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1163 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1164 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1165 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1166 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1167 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1168 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1169 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1170 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1171 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1172 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1173 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-28-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1174 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-false-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 512 - n: 32 - p: 512 - dtype: 32 - args: - - '512' - - '32' - - '512' - - '32' - input_idx: 1175 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-512-memory_only-true-mode-nondeterministic-n-32-num_clusters-56-p-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-512-n-32-p-512/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1176 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1177 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1178 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1179 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1180 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1181 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-serial-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1182 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1183 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1184 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1185 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1186 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1187 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1188 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1189 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1190 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1191 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1192 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1193 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-deterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1194 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1195 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1196 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1197 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1198 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1199 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1200 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1201 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1202 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1203 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1204 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4 - values: - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1205 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1206 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1207 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1208 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1209 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1210 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1211 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1212 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1213 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1214 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1215 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-8-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-28-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1216 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-false-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - - name: simple_matrixmul - benchmark_idx: 1 - uid: simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/simple_matrixmul - rel_path: ./simple_matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/simple_matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8 - values: - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - m: 128 - n: 512 - p: 128 - dtype: 32 - args: - - '128' - - '512' - - '128' - - '32' - input_idx: 1217 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-cores_per_cluster-1-dtype-32-m-128-memory_only-true-mode-nondeterministic-n-512-num_clusters-56-p-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/simple_matrixmul/simple_matrixmul-dtype-32-m-128-n-512-p-128/accelsim-trace - parallel: null - l2_prefill: null - matrixmul: - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 0 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 1 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 2 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 3 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 4 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 5 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 6 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 7 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 8 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 9 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 10 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 11 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 12 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 13 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 14 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 15 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 16 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 17 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 18 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 19 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 20 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 21 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 22 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 23 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 24 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 25 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 26 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 27 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 28 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 29 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 30 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 31 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 32 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 33 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 34 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 35 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 36 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 37 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '32' - - '32' - input_idx: 38 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '32' - - '32' - input_idx: 39 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '32' - - '32' - input_idx: 40 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 32 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '32' - - '32' - input_idx: 41 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-32-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-32/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 42 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 43 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 44 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 45 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 46 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 47 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 48 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 49 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 50 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 51 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 52 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 53 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 54 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 55 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 56 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 57 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 58 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 59 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 60 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 61 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 62 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 63 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 64 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 65 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 66 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 67 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 68 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 69 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 70 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 71 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 72 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 73 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 74 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 75 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 76 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 77 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 78 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 79 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '64' - - '32' - input_idx: 80 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '64' - - '32' - input_idx: 81 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '64' - - '32' - input_idx: 82 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 64 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '64' - - '32' - input_idx: 83 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-64-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-64/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 84 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 85 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 86 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 87 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 88 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 89 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 90 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 91 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 92 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 93 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 94 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 95 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 96 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 97 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 98 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 99 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 100 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 101 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 102 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 103 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 104 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 105 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 106 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 107 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 108 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 109 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 110 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 111 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 112 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 113 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 114 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 115 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 116 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 117 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 118 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 119 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 120 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 121 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '128' - - '32' - input_idx: 122 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '128' - - '32' - input_idx: 123 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '128' - - '32' - input_idx: 124 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 128 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '128' - - '32' - input_idx: 125 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-128-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-128/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 126 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 127 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 128 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 129 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 130 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 131 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 132 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 133 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 134 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 135 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 136 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 137 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 138 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 139 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 140 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 141 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 142 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 143 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 144 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 145 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 146 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 147 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 148 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 149 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 150 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 151 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 152 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 153 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 154 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 155 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 156 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 157 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 158 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 159 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 160 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 161 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 162 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 163 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '256' - - '32' - input_idx: 164 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '256' - - '32' - input_idx: 165 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '256' - - '32' - input_idx: 166 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 256 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '256' - - '32' - input_idx: 167 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-256-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-256/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 168 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 169 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 170 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 171 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-serial-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 172 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 173 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-serial-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 174 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 175 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 176 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 177 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 178 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 179 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 180 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 181 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 182 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 183 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-deterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 184 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 185 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-deterministic-num_clusters-56-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 186 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 187 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 188 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 189 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 190 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 191 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 192 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 193 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 194 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 195 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 196 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 197 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-4/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 198 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 199 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 200 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 201 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 202 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 203 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-5-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 204 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 205 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - args: - - '512' - - '32' - input_idx: 206 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - args: - - '512' - - '32' - input_idx: 207 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-8-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - args: - - '512' - - '32' - input_idx: 208 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - - name: matrixmul - benchmark_idx: 2 - uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8 - path: /home/roman/dev/box/test-apps/matrixmul - rel_path: ./matrixmul - executable: matrixmul - executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul - results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8 - values: - dtype: 32 - rows: 512 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - args: - - '512' - - '32' - input_idx: 209 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-true-mode-nondeterministic-num_clusters-56-rows-512-run_ahead-10-threads-8/simulate - traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace - accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace - parallel: null - l2_prefill: null - transpose: - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 0 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 1 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 2 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 3 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 4 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 5 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 6 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 7 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 8 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 9 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 10 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 11 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 12 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 13 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 14 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 15 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 16 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 17 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 18 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 19 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 20 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 21 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 22 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 23 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 24 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 25 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 26 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 27 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 28 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 29 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 30 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 31 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 32 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 33 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 34 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 35 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 36 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 37 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 38 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 39 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 40 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 128 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 41 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 42 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 43 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 44 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 45 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 46 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 47 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 48 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 49 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 50 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 51 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 52 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 53 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 54 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 55 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 56 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 57 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 58 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 59 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 60 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 61 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 62 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 63 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 64 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 65 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 66 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 67 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 68 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 69 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 70 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 71 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 72 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 73 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 74 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 75 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 76 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 77 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 78 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 79 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 80 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 81 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-128-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 82 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 128 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=128 - - -dimY=128 - - -repeat=0 - input_idx: 83 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 84 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 85 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 86 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 87 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 88 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 89 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 90 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 91 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 92 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 93 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 94 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 95 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 96 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 97 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 98 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 99 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 100 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 101 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 102 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 103 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 104 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 105 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 106 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 107 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 108 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 109 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 110 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 111 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 112 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 113 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 114 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 115 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 116 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 117 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 118 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 119 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 120 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 121 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 122 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 123 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 124 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 256 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 125 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 126 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 127 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 128 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 129 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 130 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 131 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 132 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 133 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 134 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 135 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 136 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 137 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 138 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 139 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 140 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 141 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 142 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 143 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 144 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 145 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 146 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 147 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 148 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 149 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 150 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 151 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 152 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 153 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 154 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 155 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 156 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 157 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 158 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 159 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 160 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 161 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 162 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 163 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 164 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 165 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-256-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 166 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced - values: - dim: 256 - variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=coalesced - - -dimX=256 - - -dimY=256 - - -repeat=0 - input_idx: 167 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 168 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 169 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 170 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 171 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 172 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: serial - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 173 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 174 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 1 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 175 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: false - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 176 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true - repeat: 1 - args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 177 - common: - repetitions: 2 - timeout: null - concurrency: 1 - enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace - parallel: null - l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + rows: 512 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false - repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 178 + - '512' + - '32' + input_idx: 108 common: repetitions: 2 timeout: null @@ -81895,35 +41645,32 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-4-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-28-rows-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null - - name: transpose - benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive - path: /home/roman/dev/box/test-apps/transpose - rel_path: ./transpose - executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive + - name: matrixmul + benchmark_idx: 2 + uid: matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-8 + path: /home/roman/dev/box/test-apps/matrixmul + rel_path: ./matrixmul + executable: matrixmul + executable_path: /home/roman/dev/box/test-apps/matrixmul/matrixmul + results_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-8 values: - dim: 512 - variant: naive - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + dtype: 32 + rows: 512 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true - repeat: 1 + memory_only: false args: - - -variant=naive - - -dimX=512 - - -dimY=512 - - -repeat=0 - input_idx: 179 + - '512' + - '32' + input_idx: 109 common: repetitions: 2 timeout: null @@ -81932,24 +41679,25 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/matrixmul/matrixmul-cores_per_cluster-1-dtype-32-memory_only-false-mode-nondeterministic-num_clusters-112-rows-512-run_ahead-10-threads-8/simulate + traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/trace + accelsim_traces_dir: /home/roman/dev/box/results/matrixmul/matrixmul-dtype-32-rows-512/accelsim-trace parallel: null l2_prefill: null + transpose: - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 @@ -81957,10 +41705,10 @@ benchmarks: repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 180 + input_idx: 0 common: repetitions: 2 timeout: null @@ -81969,24 +41717,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 @@ -81994,10 +41742,10 @@ benchmarks: repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 181 + input_idx: 1 common: repetitions: 2 timeout: null @@ -82006,35 +41754,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 182 + input_idx: 2 common: repetitions: 2 timeout: null @@ -82043,35 +41791,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: deterministic - threads: 8 + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 183 + input_idx: 3 common: repetitions: 2 timeout: null @@ -82080,35 +41828,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 184 + input_idx: 4 common: repetitions: 2 timeout: null @@ -82117,35 +41865,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: deterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 185 + input_idx: 5 common: repetitions: 2 timeout: null @@ -82154,35 +41902,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: nondeterministic + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 186 + input_idx: 6 common: repetitions: 2 timeout: null @@ -82191,35 +41939,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 187 + input_idx: 7 common: repetitions: 2 timeout: null @@ -82228,35 +41976,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 188 + input_idx: 8 common: repetitions: 2 timeout: null @@ -82265,35 +42013,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive - mode: nondeterministic - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 189 + input_idx: 9 common: repetitions: 2 timeout: null @@ -82302,35 +42050,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 190 + input_idx: 10 common: repetitions: 2 timeout: null @@ -82339,35 +42087,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 191 + input_idx: 11 common: repetitions: 2 timeout: null @@ -82376,35 +42124,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 192 + input_idx: 12 common: repetitions: 2 timeout: null @@ -82413,35 +42161,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 193 + input_idx: 13 common: repetitions: 2 timeout: null @@ -82450,35 +42198,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 194 + input_idx: 14 common: repetitions: 2 timeout: null @@ -82487,35 +42235,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 195 + input_idx: 15 common: repetitions: 2 timeout: null @@ -82524,35 +42272,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + threads: 8 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 196 + input_idx: 16 common: repetitions: 2 timeout: null @@ -82561,35 +42309,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 197 + input_idx: 17 common: repetitions: 2 timeout: null @@ -82598,35 +42346,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 198 + input_idx: 18 common: repetitions: 2 timeout: null @@ -82635,35 +42383,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 199 + input_idx: 19 common: repetitions: 2 timeout: null @@ -82672,35 +42420,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 200 + input_idx: 20 common: repetitions: 2 timeout: null @@ -82709,35 +42457,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 + dim: 128 variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - -variant=naive - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 201 + input_idx: 21 common: repetitions: 2 timeout: null @@ -82746,35 +42494,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 + dim: 128 + variant: coalesced + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 202 + input_idx: 22 common: repetitions: 2 timeout: null @@ -82783,35 +42531,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 + dim: 128 + variant: coalesced + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 203 + input_idx: 23 common: repetitions: 2 timeout: null @@ -82820,35 +42568,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose - executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive - values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + values: + dim: 128 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 204 + input_idx: 24 common: repetitions: 2 timeout: null @@ -82857,35 +42605,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 + dim: 128 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 205 + input_idx: 25 common: repetitions: 2 timeout: null @@ -82894,35 +42642,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 + dim: 128 + variant: coalesced + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 206 + input_idx: 26 common: repetitions: 2 timeout: null @@ -82931,35 +42679,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 + dim: 128 + variant: coalesced + mode: deterministic + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 207 + input_idx: 27 common: repetitions: 2 timeout: null @@ -82968,35 +42716,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + dim: 128 + variant: coalesced + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 208 + input_idx: 28 common: repetitions: 2 timeout: null @@ -83005,35 +42753,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 - variant: naive - mode: nondeterministic + dim: 128 + variant: coalesced + mode: deterministic threads: 8 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=naive - - -dimX=512 - - -dimY=512 + - -variant=coalesced + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 209 + input_idx: 29 common: repetitions: 2 timeout: null @@ -83042,35 +42790,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-naive/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 210 + input_idx: 30 common: repetitions: 2 timeout: null @@ -83079,35 +42827,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial - threads: 4 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 211 + input_idx: 31 common: repetitions: 2 timeout: null @@ -83116,35 +42864,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 212 + input_idx: 32 common: repetitions: 2 timeout: null @@ -83153,35 +42901,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 213 + input_idx: 33 common: repetitions: 2 timeout: null @@ -83190,35 +42938,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 214 + input_idx: 34 common: repetitions: 2 timeout: null @@ -83227,35 +42975,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: serial + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 215 + input_idx: 35 common: repetitions: 2 timeout: null @@ -83264,35 +43012,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 216 + input_idx: 36 common: repetitions: 2 timeout: null @@ -83301,35 +43049,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 217 + input_idx: 37 common: repetitions: 2 timeout: null @@ -83338,35 +43086,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 218 + input_idx: 38 common: repetitions: 2 timeout: null @@ -83375,35 +43123,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 219 + input_idx: 39 common: repetitions: 2 timeout: null @@ -83412,35 +43160,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic - threads: 4 + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 220 + input_idx: 40 common: repetitions: 2 timeout: null @@ -83449,35 +43197,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic - threads: 4 - run_ahead: 5 - num_clusters: 56 + mode: nondeterministic + threads: 8 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 221 + input_idx: 41 common: repetitions: 2 timeout: null @@ -83486,35 +43234,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 222 + input_idx: 42 common: repetitions: 2 timeout: null @@ -83523,35 +43271,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-128-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - dim: 512 + dim: 128 variant: coalesced - mode: deterministic + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=128 + - -dimY=128 - -repeat=0 - input_idx: 223 + input_idx: 43 common: repetitions: 2 timeout: null @@ -83560,35 +43308,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-128-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-128-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: deterministic - threads: 8 + dim: 256 + variant: naive + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 224 + input_idx: 44 common: repetitions: 2 timeout: null @@ -83597,35 +43345,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: deterministic - threads: 8 + dim: 256 + variant: naive + mode: serial + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 225 + input_idx: 45 common: repetitions: 2 timeout: null @@ -83634,35 +43382,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: deterministic - threads: 8 + dim: 256 + variant: naive + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 226 + input_idx: 46 common: repetitions: 2 timeout: null @@ -83671,35 +43419,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: deterministic - threads: 8 + dim: 256 + variant: naive + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 227 + input_idx: 47 common: repetitions: 2 timeout: null @@ -83708,23 +43456,23 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-deterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic + dim: 256 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 @@ -83732,11 +43480,11 @@ benchmarks: memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 228 + input_idx: 48 common: repetitions: 2 timeout: null @@ -83745,35 +43493,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic + dim: 256 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 229 + input_idx: 49 common: repetitions: 2 timeout: null @@ -83782,35 +43530,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic + dim: 256 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 230 + input_idx: 50 common: repetitions: 2 timeout: null @@ -83819,35 +43567,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic - threads: 4 + dim: 256 + variant: naive + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 231 + input_idx: 51 common: repetitions: 2 timeout: null @@ -83856,35 +43604,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic - threads: 4 + dim: 256 + variant: naive + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 232 + input_idx: 52 common: repetitions: 2 timeout: null @@ -83893,35 +43641,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced - mode: nondeterministic - threads: 4 + dim: 256 + variant: naive + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 233 + input_idx: 53 common: repetitions: 2 timeout: null @@ -83930,35 +43678,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 234 + input_idx: 54 common: repetitions: 2 timeout: null @@ -83967,35 +43715,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 235 + input_idx: 55 common: repetitions: 2 timeout: null @@ -84004,35 +43752,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 236 + input_idx: 56 common: repetitions: 2 timeout: null @@ -84041,35 +43789,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 237 + input_idx: 57 common: repetitions: 2 timeout: null @@ -84078,35 +43826,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 238 + input_idx: 58 common: repetitions: 2 timeout: null @@ -84115,35 +43863,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 239 + input_idx: 59 common: repetitions: 2 timeout: null @@ -84152,22 +43900,22 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 @@ -84176,11 +43924,11 @@ benchmarks: memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 240 + input_idx: 60 common: repetitions: 2 timeout: null @@ -84189,35 +43937,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 241 + input_idx: 61 common: repetitions: 2 timeout: null @@ -84226,35 +43974,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 242 + input_idx: 62 common: repetitions: 2 timeout: null @@ -84263,35 +44011,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 243 + input_idx: 63 common: repetitions: 2 timeout: null @@ -84300,35 +44048,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 244 + input_idx: 64 common: repetitions: 2 timeout: null @@ -84337,35 +44085,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive values: - dim: 512 - variant: coalesced + dim: 256 + variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false repeat: 1 args: - - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -variant=naive + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 245 + input_idx: 65 common: repetitions: 2 timeout: null @@ -84374,35 +44122,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 246 + input_idx: 66 common: repetitions: 2 timeout: null @@ -84411,35 +44159,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: true repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 247 + input_idx: 67 common: repetitions: 2 timeout: null @@ -84448,35 +44196,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 248 + input_idx: 68 common: repetitions: 2 timeout: null @@ -84485,35 +44233,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 249 + input_idx: 69 common: repetitions: 2 timeout: null @@ -84522,35 +44270,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-8-dim-512-memory_only-true-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 250 + input_idx: 70 common: repetitions: 2 timeout: null @@ -84559,35 +44307,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - name: transpose benchmark_idx: 3 - uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced path: /home/roman/dev/box/test-apps/transpose rel_path: ./transpose executable: transpose executable_path: /home/roman/dev/box/test-apps/transpose/transpose - results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - dim: 512 + dim: 256 variant: coalesced - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + mode: deterministic + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false repeat: 1 args: - -variant=coalesced - - -dimX=512 - - -dimY=512 + - -dimX=256 + - -dimY=256 - -repeat=0 - input_idx: 251 + input_idx: 71 common: repetitions: 2 timeout: null @@ -84596,34 +44344,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-nondeterministic-num_clusters-56-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate - traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace - accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace - parallel: null - l2_prefill: null - babelstream: - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace + parallel: null + l2_prefill: null + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: serial + dim: 256 + variant: coalesced + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 0 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 72 common: repetitions: 2 timeout: null @@ -84632,33 +44381,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: serial - threads: 4 + dim: 256 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 1 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 73 common: repetitions: 2 timeout: null @@ -84667,33 +44418,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: serial - threads: 4 + dim: 256 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 2 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 74 common: repetitions: 2 timeout: null @@ -84702,33 +44455,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: serial - threads: 4 + dim: 256 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 3 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 75 common: repetitions: 2 timeout: null @@ -84737,33 +44492,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: serial + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 4 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 76 common: repetitions: 2 timeout: null @@ -84772,33 +44529,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: serial + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 5 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 77 common: repetitions: 2 timeout: null @@ -84807,33 +44566,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 6 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 78 common: repetitions: 2 timeout: null @@ -84842,33 +44603,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 7 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 79 common: repetitions: 2 timeout: null @@ -84877,33 +44640,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 8 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 80 common: repetitions: 2 timeout: null @@ -84912,33 +44677,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 9 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 81 common: repetitions: 2 timeout: null @@ -84947,33 +44714,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic - threads: 4 + dim: 256 + variant: coalesced + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 10 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 82 common: repetitions: 2 timeout: null @@ -84982,33 +44751,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic - threads: 4 + dim: 256 + variant: coalesced + mode: nondeterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 11 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 83 common: repetitions: 2 timeout: null @@ -85017,33 +44788,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 12 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 84 common: repetitions: 2 timeout: null @@ -85052,33 +44825,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 13 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 85 common: repetitions: 2 timeout: null @@ -85087,33 +44862,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 14 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 86 common: repetitions: 2 timeout: null @@ -85122,33 +44899,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-256-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 1024 - mode: deterministic + dim: 256 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 15 + - -variant=coalesced + - -dimX=256 + - -dimY=256 + - -repeat=0 + input_idx: 87 common: repetitions: 2 timeout: null @@ -85157,33 +44936,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-256-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-256-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: deterministic - threads: 8 + dim: 512 + variant: naive + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 16 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 88 common: repetitions: 2 timeout: null @@ -85192,33 +44973,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: deterministic - threads: 8 + dim: 512 + variant: naive + mode: serial + threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: true + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 17 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 89 common: repetitions: 2 timeout: null @@ -85227,33 +45010,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: nondeterministic + dim: 512 + variant: naive + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 18 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 90 common: repetitions: 2 timeout: null @@ -85262,33 +45047,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: nondeterministic + dim: 512 + variant: naive + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 19 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 91 common: repetitions: 2 timeout: null @@ -85297,33 +45084,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: nondeterministic + dim: 512 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 20 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 92 common: repetitions: 2 timeout: null @@ -85332,33 +45121,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: nondeterministic + dim: 512 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 21 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 93 common: repetitions: 2 timeout: null @@ -85367,33 +45158,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 - mode: nondeterministic + dim: 512 + variant: naive + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 22 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 94 common: repetitions: 2 timeout: null @@ -85402,33 +45195,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 - mode: nondeterministic - threads: 4 + dim: 512 + variant: naive + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 23 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 95 common: repetitions: 2 timeout: null @@ -85437,33 +45232,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 - mode: nondeterministic - threads: 4 - run_ahead: 10 + dim: 512 + variant: naive + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 24 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 96 common: repetitions: 2 timeout: null @@ -85472,33 +45269,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 + dim: 512 + variant: naive + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 25 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 97 common: repetitions: 2 timeout: null @@ -85507,33 +45306,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 26 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 98 common: repetitions: 2 timeout: null @@ -85542,33 +45343,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 27 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 99 common: repetitions: 2 timeout: null @@ -85577,33 +45380,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 28 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 100 common: repetitions: 2 timeout: null @@ -85612,33 +45417,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 4 run_ahead: 10 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 29 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 101 common: repetitions: 2 timeout: null @@ -85647,33 +45454,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 30 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 102 common: repetitions: 2 timeout: null @@ -85682,33 +45491,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 31 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 103 common: repetitions: 2 timeout: null @@ -85717,33 +45528,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 32 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 104 common: repetitions: 2 timeout: null @@ -85752,33 +45565,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 33 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 105 common: repetitions: 2 timeout: null @@ -85787,33 +45602,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 34 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 106 common: repetitions: 2 timeout: null @@ -85822,33 +45639,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 35 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 107 common: repetitions: 2 timeout: null @@ -85857,33 +45676,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 36 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 108 common: repetitions: 2 timeout: null @@ -85892,33 +45713,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive values: - size: 1024 + dim: 512 + variant: naive mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 37 + - -variant=naive + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 109 common: repetitions: 2 timeout: null @@ -85927,33 +45750,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-naive/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-naive/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dim: 512 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 38 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 110 common: repetitions: 2 timeout: null @@ -85962,33 +45787,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: nondeterministic - threads: 8 - run_ahead: 10 + dim: 512 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 39 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 111 common: repetitions: 2 timeout: null @@ -85997,33 +45824,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-true-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + dim: 512 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 40 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 112 common: repetitions: 2 timeout: null @@ -86032,68 +45861,72 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-serial-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 1024 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + dim: 512 + variant: coalesced + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '1024' - - --numtimes - - '1' - input_idx: 41 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 113 common: repetitions: 2 timeout: null concurrency: 1 enabled: null - results_dir: /home/roman/dev/box/results - target: Simulate - target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-1024-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace - parallel: null - l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results + target: Simulate + target_config: !Simulate + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-serial-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace + parallel: null + l2_prefill: null + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: serial + dim: 512 + variant: coalesced + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 42 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 114 common: repetitions: 2 timeout: null @@ -86102,33 +45935,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: serial + dim: 512 + variant: coalesced + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 43 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 115 common: repetitions: 2 timeout: null @@ -86137,33 +45972,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: serial + dim: 512 + variant: coalesced + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 44 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 116 common: repetitions: 2 timeout: null @@ -86172,33 +46009,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: serial - threads: 4 + dim: 512 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 45 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 117 common: repetitions: 2 timeout: null @@ -86207,33 +46046,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: serial - threads: 4 + dim: 512 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 46 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 118 common: repetitions: 2 timeout: null @@ -86242,33 +46083,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-deterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: serial - threads: 4 + dim: 512 + variant: coalesced + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 47 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 119 common: repetitions: 2 timeout: null @@ -86277,33 +46120,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-deterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 48 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 120 common: repetitions: 2 timeout: null @@ -86312,33 +46157,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 49 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 121 common: repetitions: 2 timeout: null @@ -86347,33 +46194,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 50 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 122 common: repetitions: 2 timeout: null @@ -86382,33 +46231,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 51 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 123 common: repetitions: 2 timeout: null @@ -86417,33 +46268,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 52 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 124 common: repetitions: 2 timeout: null @@ -86452,33 +46305,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 53 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 125 common: repetitions: 2 timeout: null @@ -86487,33 +46342,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-4-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 54 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 126 common: repetitions: 2 timeout: null @@ -86522,33 +46379,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 55 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 127 common: repetitions: 2 timeout: null @@ -86557,33 +46416,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 56 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 128 common: repetitions: 2 timeout: null @@ -86592,33 +46453,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-5-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 57 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 129 common: repetitions: 2 timeout: null @@ -86627,33 +46490,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 58 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 130 common: repetitions: 2 timeout: null @@ -86662,33 +46527,35 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-4-dim-512-memory_only-false-mode-nondeterministic-num_clusters-28-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null - - name: babelstream - benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 - path: /home/roman/dev/box/test-apps/BabelStream - rel_path: ./BabelStream - executable: CUDAStream - executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + - name: transpose + benchmark_idx: 3 + uid: transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced + path: /home/roman/dev/box/test-apps/transpose + rel_path: ./transpose + executable: transpose + executable_path: /home/roman/dev/box/test-apps/transpose/transpose + results_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced values: - size: 10240 - mode: deterministic + dim: 512 + variant: coalesced + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false + repeat: 1 args: - - --arraysize - - '10240' - - --numtimes - - '1' - input_idx: 59 + - -variant=coalesced + - -dimX=512 + - -dimY=512 + - -repeat=0 + input_idx: 131 common: repetitions: 2 timeout: null @@ -86697,22 +46564,23 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/transpose/transpose-cores_per_cluster-1-dim-512-memory_only-false-mode-nondeterministic-num_clusters-112-repeat-1-run_ahead-10-threads-8-variant-coalesced/simulate + traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/trace + accelsim_traces_dir: /home/roman/dev/box/results/transpose/transpose-dim-512-repeat-1-variant-coalesced/accelsim-trace parallel: null l2_prefill: null + babelstream: - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 @@ -86720,10 +46588,10 @@ benchmarks: memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 60 + input_idx: 0 common: repetitions: 2 timeout: null @@ -86732,22 +46600,22 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 @@ -86755,10 +46623,10 @@ benchmarks: memory_only: true args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 61 + input_idx: 1 common: repetitions: 2 timeout: null @@ -86767,33 +46635,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 62 + input_idx: 2 common: repetitions: 2 timeout: null @@ -86802,33 +46670,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 63 + input_idx: 3 common: repetitions: 2 timeout: null @@ -86837,33 +46705,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 64 + input_idx: 4 common: repetitions: 2 timeout: null @@ -86872,33 +46740,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 65 + input_idx: 5 common: repetitions: 2 timeout: null @@ -86907,33 +46775,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-4 values: - size: 10240 - mode: nondeterministic + size: 1024 + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 66 + input_idx: 6 common: repetitions: 2 timeout: null @@ -86942,33 +46810,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 values: - size: 10240 - mode: nondeterministic - threads: 4 - run_ahead: 10 + size: 1024 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 67 + input_idx: 7 common: repetitions: 2 timeout: null @@ -86977,33 +46845,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 values: - size: 10240 - mode: nondeterministic - threads: 4 - run_ahead: 10 + size: 1024 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 68 + input_idx: 8 common: repetitions: 2 timeout: null @@ -87012,33 +46880,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-8 values: - size: 10240 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + size: 1024 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 69 + input_idx: 9 common: repetitions: 2 timeout: null @@ -87047,33 +46915,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 70 + input_idx: 10 common: repetitions: 2 timeout: null @@ -87082,33 +46950,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 71 + input_idx: 11 common: repetitions: 2 timeout: null @@ -87117,33 +46985,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 72 + input_idx: 12 common: repetitions: 2 timeout: null @@ -87152,33 +47020,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 73 + input_idx: 13 common: repetitions: 2 timeout: null @@ -87187,33 +47055,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 74 + input_idx: 14 common: repetitions: 2 timeout: null @@ -87222,33 +47090,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-4 values: - size: 10240 + size: 1024 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + threads: 4 + run_ahead: 10 + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 75 + input_idx: 15 common: repetitions: 2 timeout: null @@ -87257,33 +47125,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 76 + input_idx: 16 common: repetitions: 2 timeout: null @@ -87292,33 +47160,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 - memory_only: true + num_clusters: 28 + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 77 + input_idx: 17 common: repetitions: 2 timeout: null @@ -87327,33 +47195,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 28 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 78 + input_idx: 18 common: repetitions: 2 timeout: null @@ -87362,33 +47230,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 79 + input_idx: 19 common: repetitions: 2 timeout: null @@ -87397,33 +47265,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 80 + input_idx: 20 common: repetitions: 2 timeout: null @@ -87432,33 +47300,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-8 values: - size: 10240 + size: 1024 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + num_clusters: 112 + cores_per_cluster: 1 + memory_only: false args: - --arraysize - - '10240' + - '1024' - --numtimes - '1' - input_idx: 81 + input_idx: 21 common: repetitions: 2 timeout: null @@ -87467,25 +47335,25 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-1024-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-1024/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 values: size: 10240 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: false args: @@ -87493,7 +47361,7 @@ benchmarks: - '10240' - --numtimes - '1' - input_idx: 82 + input_idx: 22 common: repetitions: 2 timeout: null @@ -87502,25 +47370,25 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 values: size: 10240 - mode: nondeterministic - threads: 8 - run_ahead: 10 - num_clusters: 56 + mode: serial + threads: 4 + run_ahead: 5 + num_clusters: 28 cores_per_cluster: 1 memory_only: true args: @@ -87528,7 +47396,7 @@ benchmarks: - '10240' - --numtimes - '1' - input_idx: 83 + input_idx: 23 common: repetitions: 2 timeout: null @@ -87537,33 +47405,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-10240-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4 values: - size: 102400 + size: 10240 mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 84 + input_idx: 24 common: repetitions: 2 timeout: null @@ -87572,33 +47440,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-10240-threads-4 values: - size: 102400 + size: 10240 mode: serial threads: 4 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 85 + input_idx: 25 common: repetitions: 2 timeout: null @@ -87607,33 +47475,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: serial + size: 10240 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 86 + input_idx: 26 common: repetitions: 2 timeout: null @@ -87642,33 +47510,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: serial + size: 10240 + mode: deterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 87 + input_idx: 27 common: repetitions: 2 timeout: null @@ -87677,33 +47545,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: serial + size: 10240 + mode: deterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 88 + input_idx: 28 common: repetitions: 2 timeout: null @@ -87712,33 +47580,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 values: - size: 102400 - mode: serial - threads: 4 + size: 10240 + mode: deterministic + threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 89 + input_idx: 29 common: repetitions: 2 timeout: null @@ -87747,33 +47615,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 values: - size: 102400 + size: 10240 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 90 + input_idx: 30 common: repetitions: 2 timeout: null @@ -87782,33 +47650,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-8 values: - size: 102400 + size: 10240 mode: deterministic - threads: 4 + threads: 8 run_ahead: 5 - num_clusters: 28 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 91 + input_idx: 31 common: repetitions: 2 timeout: null @@ -87817,33 +47685,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 92 + input_idx: 32 common: repetitions: 2 timeout: null @@ -87852,33 +47720,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 93 + input_idx: 33 common: repetitions: 2 timeout: null @@ -87887,33 +47755,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-4 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 94 + input_idx: 34 common: repetitions: 2 timeout: null @@ -87922,33 +47790,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 4 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 95 + input_idx: 35 common: repetitions: 2 timeout: null @@ -87957,33 +47825,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4 values: - size: 102400 - mode: deterministic - threads: 8 - run_ahead: 5 + size: 10240 + mode: nondeterministic + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 96 + input_idx: 36 common: repetitions: 2 timeout: null @@ -87992,33 +47860,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-4 values: - size: 102400 - mode: deterministic - threads: 8 - run_ahead: 5 - num_clusters: 28 + size: 10240 + mode: nondeterministic + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 97 + input_idx: 37 common: repetitions: 2 timeout: null @@ -88027,33 +47895,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-4/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 98 + input_idx: 38 common: repetitions: 2 timeout: null @@ -88062,33 +47930,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 8 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 99 + input_idx: 39 common: repetitions: 2 timeout: null @@ -88097,33 +47965,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-8 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 8 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 100 + input_idx: 40 common: repetitions: 2 timeout: null @@ -88132,33 +48000,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 values: - size: 102400 - mode: deterministic + size: 10240 + mode: nondeterministic threads: 8 - run_ahead: 5 - num_clusters: 56 + run_ahead: 10 + num_clusters: 28 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 101 + input_idx: 41 common: repetitions: 2 timeout: null @@ -88167,33 +48035,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-deterministic-num_clusters-56-run_ahead-5-size-102400-threads-8/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8 values: - size: 102400 + size: 10240 mode: nondeterministic - threads: 4 - run_ahead: 5 + threads: 8 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 1 + cores_per_cluster: 4 memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 102 + input_idx: 42 common: repetitions: 2 timeout: null @@ -88202,33 +48070,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-8 values: - size: 102400 + size: 10240 mode: nondeterministic - threads: 4 - run_ahead: 5 - num_clusters: 28 + threads: 8 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - - '102400' + - '10240' - --numtimes - '1' - input_idx: 103 + input_idx: 43 common: repetitions: 2 timeout: null @@ -88237,33 +48105,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate - traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace - accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-10240-threads-8/simulate + traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/trace + accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-10240/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 104 + input_idx: 44 common: repetitions: 2 timeout: null @@ -88272,33 +48140,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 + cores_per_cluster: 1 memory_only: true args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 105 + input_idx: 45 common: repetitions: 2 timeout: null @@ -88307,33 +48175,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 106 + input_idx: 46 common: repetitions: 2 timeout: null @@ -88342,33 +48210,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-serial-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: serial threads: 4 run_ahead: 5 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 107 + input_idx: 47 common: repetitions: 2 timeout: null @@ -88377,24 +48245,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-serial-num_clusters-112-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false @@ -88403,7 +48271,7 @@ benchmarks: - '102400' - --numtimes - '1' - input_idx: 108 + input_idx: 48 common: repetitions: 2 timeout: null @@ -88412,33 +48280,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 109 + input_idx: 49 common: repetitions: 2 timeout: null @@ -88447,33 +48315,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-4 values: size: 102400 - mode: nondeterministic + mode: deterministic threads: 4 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 110 + input_idx: 50 common: repetitions: 2 timeout: null @@ -88482,33 +48350,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 values: size: 102400 - mode: nondeterministic - threads: 4 - run_ahead: 10 + mode: deterministic + threads: 8 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 111 + input_idx: 51 common: repetitions: 2 timeout: null @@ -88517,33 +48385,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 values: size: 102400 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 112 + input_idx: 52 common: repetitions: 2 timeout: null @@ -88552,33 +48420,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-deterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-8 values: size: 102400 - mode: nondeterministic - threads: 4 - run_ahead: 10 - num_clusters: 56 + mode: deterministic + threads: 8 + run_ahead: 5 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 113 + input_idx: 53 common: repetitions: 2 timeout: null @@ -88587,23 +48455,23 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-4/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-deterministic-num_clusters-112-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 @@ -88613,7 +48481,7 @@ benchmarks: - '102400' - --numtimes - '1' - input_idx: 114 + input_idx: 54 common: repetitions: 2 timeout: null @@ -88622,33 +48490,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 115 + input_idx: 55 common: repetitions: 2 timeout: null @@ -88657,33 +48525,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 + threads: 4 run_ahead: 5 - num_clusters: 28 - cores_per_cluster: 8 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 116 + input_idx: 56 common: repetitions: 2 timeout: null @@ -88692,33 +48560,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 - run_ahead: 5 + threads: 4 + run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 117 + input_idx: 57 common: repetitions: 2 timeout: null @@ -88727,33 +48595,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 - cores_per_cluster: 1 + threads: 4 + run_ahead: 10 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 118 + input_idx: 58 common: repetitions: 2 timeout: null @@ -88762,33 +48630,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-4 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-4 values: size: 102400 mode: nondeterministic - threads: 8 - run_ahead: 5 - num_clusters: 56 + threads: 4 + run_ahead: 10 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 119 + input_idx: 59 common: repetitions: 2 timeout: null @@ -88797,24 +48665,24 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-5-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-4/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 cores_per_cluster: 1 memory_only: false @@ -88823,7 +48691,7 @@ benchmarks: - '102400' - --numtimes - '1' - input_idx: 120 + input_idx: 60 common: repetitions: 2 timeout: null @@ -88832,33 +48700,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 - run_ahead: 10 + run_ahead: 5 num_clusters: 28 - cores_per_cluster: 1 - memory_only: true + cores_per_cluster: 4 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 121 + input_idx: 61 common: repetitions: 2 timeout: null @@ -88867,33 +48735,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 - run_ahead: 10 - num_clusters: 28 - cores_per_cluster: 8 + run_ahead: 5 + num_clusters: 112 + cores_per_cluster: 1 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 122 + input_idx: 62 common: repetitions: 2 timeout: null @@ -88902,33 +48770,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-5-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 run_ahead: 10 num_clusters: 28 - cores_per_cluster: 8 - memory_only: true + cores_per_cluster: 1 + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 123 + input_idx: 63 common: repetitions: 2 timeout: null @@ -88937,33 +48805,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-8-memory_only-true-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 - cores_per_cluster: 1 + num_clusters: 28 + cores_per_cluster: 4 memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 124 + input_idx: 64 common: repetitions: 2 timeout: null @@ -88972,33 +48840,33 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-4-memory_only-false-mode-nondeterministic-num_clusters-28-run_ahead-10-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null l2_prefill: null - name: babelstream benchmark_idx: 4 - uid: babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8 + uid: babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-8 path: /home/roman/dev/box/test-apps/BabelStream rel_path: ./BabelStream executable: CUDAStream executable_path: /home/roman/dev/box/test-apps/BabelStream/CUDAStream - results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8 + results_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-8 values: size: 102400 mode: nondeterministic threads: 8 run_ahead: 10 - num_clusters: 56 + num_clusters: 112 cores_per_cluster: 1 - memory_only: true + memory_only: false args: - --arraysize - '102400' - --numtimes - '1' - input_idx: 125 + input_idx: 65 common: repetitions: 2 timeout: null @@ -89007,7 +48875,7 @@ benchmarks: results_dir: /home/roman/dev/box/results target: Simulate target_config: !Simulate - stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-true-mode-nondeterministic-num_clusters-56-run_ahead-10-size-102400-threads-8/simulate + stats_dir: /home/roman/dev/box/results/babelstream/babelstream-cores_per_cluster-1-memory_only-false-mode-nondeterministic-num_clusters-112-run_ahead-10-size-102400-threads-8/simulate traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/trace accelsim_traces_dir: /home/roman/dev/box/results/babelstream/babelstream-size-102400/accelsim-trace parallel: null diff --git a/test-apps/test-apps.yml b/test-apps/test-apps.yml index 72b8c623..695113b4 100644 --- a/test-apps/test-apps.yml +++ b/test-apps/test-apps.yml @@ -28,13 +28,11 @@ config: keep_log_file: true # for simulation, we do not set a limit on concurrency exec_simulate: - # No concurrency limit for now (until we implement multi-threading) - concurrency: 1 # null + concurrency: 1 repetitions: 1 # l2_prefill: true simulate: - # No concurrency limit for now (until we implement multi-threading) - concurrency: 1 # null + concurrency: 1 repetitions: 2 # l2_prefill: true # this is added to all inputs @@ -45,22 +43,38 @@ config: - nondeterministic # - nondeterministic_interleave threads: [4, 8] + # run_ahead: [10] run_ahead: [5, 10] - num_clusters: [28, 56] + # num_clusters: [28] + # scaling num clusters works well + num_clusters: [28, 112] # cores_per_cluster: [1, 4, 8] - cores_per_cluster: [1, 8] + # scaling cores per cluster does not work well + cores_per_cluster: [1, 4] memory_only: [false, true] exclude: + # do not run serial for more threads - mode: serial threads: 8 + # do not run for different run ahead for serial and determinisic - mode: serial run_ahead: 10 - mode: deterministic run_ahead: 10 - - num_clusters: 56 - cores_per_cluster: 8 - - num_clusters: 56 - cores_per_cluster: 8 + # do not run memory only for anything but the baseline + - mode: deterministic + memory_only: true + - mode: nondeterministic + memory_only: true + - mode: serial + cores_per_cluster: 4 + memory_only: true + - mode: serial + num_clusters: 112 + memory_only: true + # do not run for 448 cores + - num_clusters: 112 + cores_per_cluster: 4 # for accelsim simulation, we do not set a limit on concurrency accelsim_simulate: # No concurrency limit as accelsim is single threaded.