From 75ebae2246d806fb6a0f2e67b46e350284db8172 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 11 Oct 2024 12:54:00 +0200 Subject: [PATCH 1/5] [ci] add benchmark tests with complex types --- benchmark/test/CMakeLists.txt | 2 +- benchmark/test/blas.py | 8 ++++++++ benchmark/test/conversion.py | 8 ++++++++ benchmark/test/multi_vector_distributed.py | 9 +++++++++ benchmark/test/preconditioner.py | 8 ++++++++ benchmark/test/solver.py | 8 ++++++++ benchmark/test/solver_distributed.py | 11 +++++++++++ benchmark/test/sparse_blas.py | 9 +++++++++ benchmark/test/spmv.py | 8 ++++++++ benchmark/test/spmv_distributed.py | 9 +++++++++ benchmark/test/test_framework.py.in | 19 ++++++++++++------- 11 files changed, 91 insertions(+), 8 deletions(-) diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt index 2f43b6eaf71..c0f7bf26fd4 100644 --- a/benchmark/test/CMakeLists.txt +++ b/benchmark/test/CMakeLists.txt @@ -9,7 +9,7 @@ function(add_benchmark_test test_name) COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py $ --generate COMMENT "Regenerating reference output for ${test_name}" WORKING_DIRECTORY "$") - add_dependencies(${regenerate_target} ${test_name}) + add_dependencies(${regenerate_target} ${test_name} ${test_name}_dcomplex) add_dependencies(benchmark_test_regenerate ${regenerate_target}) endfunction() add_custom_target(benchmark_test_regenerate) diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py index ff5bddc5d08..98a775c4992 100755 --- a/benchmark/test/blas.py +++ b/benchmark/test/blas.py @@ -30,3 +30,11 @@ expected_stdout="blas.profile.stdout", expected_stderr="blas.profile.stderr", ) + +# complex +test_framework.compare_output( + ["-input", '[{"n": 100}]'], + expected_stdout="blas_dcomplex.simple.stdout", + expected_stderr="blas_dcomplex.simple.stderr", + use_complex=True +) \ No newline at end of file diff --git a/benchmark/test/conversion.py b/benchmark/test/conversion.py index 2eada100731..ceca02f708e 100755 --- a/benchmark/test/conversion.py +++ b/benchmark/test/conversion.py @@ -67,3 +67,11 @@ expected_stdout="conversion.profile.stdout", expected_stderr="conversion.profile.stderr", ) + +# complex +test_framework.compare_output( + ["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"], + expected_stdout="conversion_dcomplex.simple.stdout", + expected_stderr="conversion_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py index c62cb8ebd17..aff2e2b21ee 100644 --- a/benchmark/test/multi_vector_distributed.py +++ b/benchmark/test/multi_vector_distributed.py @@ -34,3 +34,12 @@ expected_stderr="multi_vector_distributed.profile.stderr", num_procs=3, ) + +# complex +test_framework.compare_output_distributed( + ["-input", '[{"n": 100}]'], + expected_stdout="multi_vector_distributed_dcomplex.simple.stdout", + expected_stderr="multi_vector_distributed_dcomplex.simple.stderr", + num_procs=3, + use_complex=True +) diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py index 25c4adb8c5a..2d1b3bd2716 100755 --- a/benchmark/test/preconditioner.py +++ b/benchmark/test/preconditioner.py @@ -66,3 +66,11 @@ expected_stderr="preconditioner.reordered.stderr", stdin='[{"size": 100, "stencil": "7pt"}]', ) + +# complex +test_framework.compare_output( + ["-input", '[{"size": 100, "stencil": "7pt"}]'], + expected_stdout="preconditioner_dcomplex.simple.stdout", + expected_stderr="preconditioner_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py index 5dd1d840a4e..5ce0002df2e 100755 --- a/benchmark/test/solver.py +++ b/benchmark/test/solver.py @@ -51,3 +51,11 @@ expected_stderr="solver.reordered.stderr", stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', ) + +# complex input +test_framework.compare_output( + ["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'], + expected_stdout="solver_dcomplex.simple.stdout", + expected_stderr="solver_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/solver_distributed.py b/benchmark/test/solver_distributed.py index 54bbb030077..6c6efb653ef 100644 --- a/benchmark/test/solver_distributed.py +++ b/benchmark/test/solver_distributed.py @@ -46,3 +46,14 @@ expected_stdout="distributed_solver.profile.stdout", expected_stderr="distributed_solver.profile.stderr", ) + +# complex +test_framework.compare_output( + [ + "-input", + '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]', + ], + expected_stdout="distributed_solver_dcomplex.simple.stdout", + expected_stderr="distributed_solver_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py index 8e6cda3c9bd..05ee84aade0 100755 --- a/benchmark/test/sparse_blas.py +++ b/benchmark/test/sparse_blas.py @@ -64,3 +64,12 @@ expected_stderr="sparse_blas.reordered.stderr", stdin='[{"size": 100, "stencil": "7pt"}]', ) + +# complex +test_framework.compare_output( + ["-operations", "transpose", "-input", + '[{"size": 100, "stencil": "7pt"}]'], + expected_stdout="sparse_blas_dcomplex.simple.stdout", + expected_stderr="sparse_blas_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py index f6f4a4b5c39..d447490802a 100755 --- a/benchmark/test/spmv.py +++ b/benchmark/test/spmv.py @@ -51,3 +51,11 @@ expected_stderr="spmv.reordered.stderr", stdin='[{"size": 100, "stencil": "7pt"}]', ) + +# complex +test_framework.compare_output( + ["-input", '[{"size": 100, "stencil": "7pt"}]'], + expected_stdout="spmv_dcomplex.simple.stdout", + expected_stderr="spmv_dcomplex.simple.stderr", + use_complex=True +) diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py index 356db48459e..11922f7ae8d 100644 --- a/benchmark/test/spmv_distributed.py +++ b/benchmark/test/spmv_distributed.py @@ -40,3 +40,12 @@ expected_stderr="spmv_distributed.profile.stderr", num_procs=3, ) + +# complex +test_framework.compare_output_distributed( + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'], + expected_stdout="spmv_distributed_dcomplex.simple.stdout", + expected_stderr="spmv_distributed_dcomplex.simple.stderr", + num_procs=3, + use_complex=True +) diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 9294b2f02ec..9f2e5718c05 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -135,8 +135,11 @@ def compare_output_impl( expected_stderr: str, stdin: str, launcher_flags: List[str], + use_complex: bool ): - args = [sys.argv[1]] + args + base_file = sys.argv[1] + file = base_file if not use_complex else f"{base_file}_dcomplex" + args = [file] + args expected_stdout = str(sourcepath / "reference" / expected_stdout) expected_stderr = str(sourcepath / "reference" / expected_stderr) result = subprocess.run( @@ -211,7 +214,7 @@ def compare_output_impl( def compare_output( - args: List[str], expected_stdout: str, expected_stderr: str, stdin: str = "" + args: List[str], *, expected_stdout: str, expected_stderr: str, stdin: str = "", use_complex: bool = False ): compare_output_impl( args, @@ -219,16 +222,18 @@ def compare_output( expected_stderr=expected_stderr, stdin=stdin, launcher_flags=[], + use_complex=use_complex ) def compare_output_distributed( - args, expected_stdout, expected_stderr, num_procs, stdin="" + args: List[str], *, expected_stdout: str, expected_stderr: str, num_procs: int, stdin: str = "", use_complex: bool = False ): compare_output_impl( args, - expected_stdout, - expected_stderr, - stdin, - ["@MPIEXEC_EXECUTABLE@", "@MPIEXEC_NUMPROC_FLAG@", str(num_procs)], + expected_stdout=expected_stdout, + expected_stderr=expected_stderr, + stdin=stdin, + launcher_flags=["@MPIEXEC_EXECUTABLE@", "@MPIEXEC_NUMPROC_FLAG@", str(num_procs)], + use_complex=use_complex ) From 463812808a9ee9406202e71ae1a583b0020feb90 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 11 Oct 2024 12:55:52 +0200 Subject: [PATCH 2/5] [ci] add generated benchmark test reference --- .../reference/blas_dcomplex.simple.stderr | 8 +++ .../reference/blas_dcomplex.simple.stdout | 28 +++++++++ .../conversion_dcomplex.simple.stderr | 10 ++++ .../conversion_dcomplex.simple.stdout | 31 ++++++++++ .../distributed_solver_dcomplex.simple.stderr | 8 +++ .../distributed_solver_dcomplex.simple.stdout | 59 +++++++++++++++++++ ..._vector_distributed_dcomplex.simple.stderr | 8 +++ ..._vector_distributed_dcomplex.simple.stdout | 28 +++++++++ .../preconditioner_dcomplex.simple.stderr | 7 +++ .../preconditioner_dcomplex.simple.stdout | 32 ++++++++++ .../reference/solver_dcomplex.simple.stderr | 8 +++ .../reference/solver_dcomplex.simple.stdout | 56 ++++++++++++++++++ .../sparse_blas_dcomplex.simple.stderr | 7 +++ .../sparse_blas_dcomplex.simple.stdout | 25 ++++++++ .../reference/spmv_dcomplex.simple.stderr | 8 +++ .../reference/spmv_dcomplex.simple.stdout | 21 +++++++ .../spmv_distributed_dcomplex.simple.stderr | 8 +++ .../spmv_distributed_dcomplex.simple.stdout | 22 +++++++ 18 files changed, 374 insertions(+) create mode 100644 benchmark/test/reference/blas_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/blas_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/conversion_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/conversion_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/distributed_solver_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/distributed_solver_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/preconditioner_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/preconditioner_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/solver_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/solver_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/sparse_blas_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/sparse_blas_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/spmv_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/spmv_dcomplex.simple.stdout create mode 100644 benchmark/test/reference/spmv_distributed_dcomplex.simple.stderr create mode 100644 benchmark/test/reference/spmv_distributed_dcomplex.simple.stdout diff --git a/benchmark/test/reference/blas_dcomplex.simple.stderr b/benchmark/test/reference/blas_dcomplex.simple.stderr new file mode 100644 index 00000000000..ff505a3f1c9 --- /dev/null +++ b/benchmark/test/reference/blas_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/blas_dcomplex.simple.stdout b/benchmark/test/reference/blas_dcomplex.simple.stdout new file mode 100644 index 00000000000..54745d81104 --- /dev/null +++ b/benchmark/test/reference/blas_dcomplex.simple.stdout @@ -0,0 +1,28 @@ +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/conversion_dcomplex.simple.stderr b/benchmark/test/reference/conversion_dcomplex.simple.stderr new file mode 100644 index 00000000000..23a27c4372a --- /dev/null +++ b/benchmark/test/reference/conversion_dcomplex.simple.stderr @@ -0,0 +1,10 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are coo,csr +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running conversion: coo-read + Running conversion: coo-csr + Running conversion: csr-read + Running conversion: csr-coo diff --git a/benchmark/test/reference/conversion_dcomplex.simple.stdout b/benchmark/test/reference/conversion_dcomplex.simple.stdout new file mode 100644 index 00000000000..91b69b8a248 --- /dev/null +++ b/benchmark/test/reference/conversion_dcomplex.simple.stdout @@ -0,0 +1,31 @@ +[ + { + "size": 100, + "stencil": "7pt", + "conversion": { + "coo-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "coo-csr": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "csr-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "csr-coo": { + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 + } +] diff --git a/benchmark/test/reference/distributed_solver_dcomplex.simple.stderr b/benchmark/test/reference/distributed_solver_dcomplex.simple.stderr new file mode 100644 index 00000000000..9d4b1f7094e --- /dev/null +++ b/benchmark/test/reference/distributed_solver_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +Running cg with 1000 iterations and residual goal of 1.000000e-06 +The number of right hand sides is 1 +Running test case stencil(100, 7pt, stencil) +Matrix is of size (125, 125) + Running solver: cg diff --git a/benchmark/test/reference/distributed_solver_dcomplex.simple.stdout b/benchmark/test/reference/distributed_solver_dcomplex.simple.stdout new file mode 100644 index 00000000000..458115e6ab2 --- /dev/null +++ b/benchmark/test/reference/distributed_solver_dcomplex.simple.stdout @@ -0,0 +1,59 @@ +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "optimal": { + "spmv": "csr-csr" + }, + "solver": { + "cg": { + "recurrent_residuals": [], + "true_residuals": [], + "implicit_residuals": [], + "iteration_timestamps": [], + "rhs_norm": 1.0, + "generate": { + "components": { + "generate()": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "time": 1.0 + }, + "apply": { + "components": { + "apply()": 1.0, + "iteration": 1.0, + "allocate": 1.0, + "dense::fill": 1.0, + "cg::initialize": 1.0, + "advanced_apply()": 1.0, + "dense::row_gather": 1.0, + "csr::advanced_spmv": 1.0, + "dense::compute_squared_norm2": 1.0, + "dense::compute_sqrt": 1.0, + "copy()": 1.0, + "dense::copy": 1.0, + "dense::compute_conj_dot_dispatch": 1.0, + "check()": 1.0, + "residual_norm::residual_norm": 1.0, + "cg::step_1": 1.0, + "csr::spmv": 1.0, + "cg::step_2": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "iterations": 7, + "time": 1.0 + }, + "preconditioner": {}, + "residual_norm": 1.0, + "repetitions": 1, + "completed": true + } + }, + "rows": 125, + "cols": 125 + } +] diff --git a/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stderr b/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stderr new file mode 100644 index 00000000000..ff505a3f1c9 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stdout b/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stdout new file mode 100644 index 00000000000..54745d81104 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed_dcomplex.simple.stdout @@ -0,0 +1,28 @@ +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/preconditioner_dcomplex.simple.stderr b/benchmark/test/reference/preconditioner_dcomplex.simple.stderr new file mode 100644 index 00000000000..d36bc663e57 --- /dev/null +++ b/benchmark/test/reference/preconditioner_dcomplex.simple.stderr @@ -0,0 +1,7 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +Running with preconditioners: none +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner_dcomplex.simple.stdout b/benchmark/test/reference/preconditioner_dcomplex.simple.stdout new file mode 100644 index 00000000000..ed567dcbb13 --- /dev/null +++ b/benchmark/test/reference/preconditioner_dcomplex.simple.stdout @@ -0,0 +1,32 @@ +[ + { + "size": 100, + "stencil": "7pt", + "preconditioner": { + "none": { + "generate": { + "components": { + "generate()": 1.0, + "overhead": 1.0 + }, + "time": 1.0, + "repetitions": 10 + }, + "apply": { + "components": { + "apply()": 1.0, + "copy()": 1.0, + "dense::copy": 1.0, + "overhead": 1.0 + }, + "time": 1.0, + "repetitions": 10 + }, + "completed": true + } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 + } +] diff --git a/benchmark/test/reference/solver_dcomplex.simple.stderr b/benchmark/test/reference/solver_dcomplex.simple.stderr new file mode 100644 index 00000000000..6baa84ee792 --- /dev/null +++ b/benchmark/test/reference/solver_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +Running cg with 1000 iterations and residual goal of 1.000000e-06 +The number of right hand sides is 1 +Running test case stencil(100, 7pt) +Matrix is of size (125, 125) + Running solver: cg diff --git a/benchmark/test/reference/solver_dcomplex.simple.stdout b/benchmark/test/reference/solver_dcomplex.simple.stdout new file mode 100644 index 00000000000..0ee0e4b9a4b --- /dev/null +++ b/benchmark/test/reference/solver_dcomplex.simple.stdout @@ -0,0 +1,56 @@ +[ + { + "size": 100, + "stencil": "7pt", + "optimal": { + "spmv": "csr" + }, + "solver": { + "cg": { + "recurrent_residuals": [], + "true_residuals": [], + "implicit_residuals": [], + "iteration_timestamps": [], + "rhs_norm": 1.0, + "generate": { + "components": { + "generate()": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "time": 1.0 + }, + "apply": { + "components": { + "apply()": 1.0, + "iteration": 1.0, + "allocate": 1.0, + "dense::fill": 1.0, + "cg::initialize": 1.0, + "advanced_apply()": 1.0, + "csr::advanced_spmv": 1.0, + "dense::compute_norm2_dispatch": 1.0, + "copy()": 1.0, + "dense::copy": 1.0, + "dense::compute_conj_dot_dispatch": 1.0, + "check()": 1.0, + "residual_norm::residual_norm": 1.0, + "cg::step_1": 1.0, + "csr::spmv": 1.0, + "cg::step_2": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "iterations": 7, + "time": 1.0 + }, + "preconditioner": {}, + "residual_norm": 1.0, + "repetitions": 1, + "completed": true + } + }, + "rows": 125, + "cols": 125 + } +] diff --git a/benchmark/test/reference/sparse_blas_dcomplex.simple.stderr b/benchmark/test/reference/sparse_blas_dcomplex.simple.stderr new file mode 100644 index 00000000000..d4e29cd9cd7 --- /dev/null +++ b/benchmark/test/reference/sparse_blas_dcomplex.simple.stderr @@ -0,0 +1,7 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are transpose +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running sparse_blas: transpose diff --git a/benchmark/test/reference/sparse_blas_dcomplex.simple.stdout b/benchmark/test/reference/sparse_blas_dcomplex.simple.stdout new file mode 100644 index 00000000000..a44f4f189b2 --- /dev/null +++ b/benchmark/test/reference/sparse_blas_dcomplex.simple.stdout @@ -0,0 +1,25 @@ +[ + { + "size": 100, + "stencil": "7pt", + "sparse_blas": { + "transpose": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "components": { + "allocate": 1.0, + "components::fill_array": 1.0, + "csr::transpose": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "completed": true + } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 + } +] diff --git a/benchmark/test/reference/spmv_dcomplex.simple.stderr b/benchmark/test/reference/spmv_dcomplex.simple.stderr new file mode 100644 index 00000000000..a1f6a62e866 --- /dev/null +++ b/benchmark/test/reference/spmv_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are coo +The number of right hand sides is 1 +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running spmv: coo diff --git a/benchmark/test/reference/spmv_dcomplex.simple.stdout b/benchmark/test/reference/spmv_dcomplex.simple.stdout new file mode 100644 index 00000000000..ea0944bfd25 --- /dev/null +++ b/benchmark/test/reference/spmv_dcomplex.simple.stdout @@ -0,0 +1,21 @@ +[ + { + "size": 100, + "stencil": "7pt", + "spmv": { + "coo": { + "storage": 17400, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725, + "optimal": { + "spmv": "coo" + } + } +] diff --git a/benchmark/test/reference/spmv_distributed_dcomplex.simple.stderr b/benchmark/test/reference/spmv_distributed_dcomplex.simple.stderr new file mode 100644 index 00000000000..b3739ed8774 --- /dev/null +++ b/benchmark/test/reference/spmv_distributed_dcomplex.simple.stderr @@ -0,0 +1,8 @@ +Running on ReferenceExecutor +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case stencil(100, 7pt, stencil) +Matrix is of size (81, 81), 144 + Running spmv: csr-csr diff --git a/benchmark/test/reference/spmv_distributed_dcomplex.simple.stdout b/benchmark/test/reference/spmv_distributed_dcomplex.simple.stdout new file mode 100644 index 00000000000..779739d7d6c --- /dev/null +++ b/benchmark/test/reference/spmv_distributed_dcomplex.simple.stdout @@ -0,0 +1,22 @@ +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": { + "csr-csr": { + "storage": 9972, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 81, + "cols": 81, + "nonzeros": 144, + "optimal": { + "spmv": "csr-csr" + } + } +] From 4e6574d6bcb402902f09f7d8440a40c04e264dbf Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 11 Oct 2024 12:57:08 +0200 Subject: [PATCH 3/5] [bench] fix residual norm logger for complex --- benchmark/utils/loggers.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index 65d086beecb..ec6f65413c3 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -156,14 +156,14 @@ struct ResidualLogger : gko::log::Logger { rec_res_norms->push_back( get_norm(gko::as>(residual_norm))); } else { - gko::detail::vector_dispatch( + gko::detail::vector_dispatch( residual, [&](const auto v_residual) { rec_res_norms->push_back(compute_norm2(v_residual)); }); } if (solution) { gko::detail::vector_dispatch< - rc_vtype>(solution, [&](auto v_solution) { + ValueType>(solution, [&](auto v_solution) { using concrete_type = std::remove_pointer_t>; true_res_norms->push_back(compute_residual_norm( @@ -174,7 +174,9 @@ struct ResidualLogger : gko::log::Logger { } if (implicit_sq_residual_norm) { implicit_res_norms->push_back(std::sqrt( - get_norm(gko::as>(implicit_sq_residual_norm)))); + get_norm(gko::as>(implicit_sq_residual_norm) + ->compute_absolute() + .get()))); has_implicit_res_norm = true; } else { implicit_res_norms->push_back(-1.0); From 50a4605cde6dd430cede1d90e17fe7f941753536 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 14 Oct 2024 09:18:39 +0200 Subject: [PATCH 4/5] fixup! [ci] add benchmark tests with complex types --- benchmark/test/test_framework.py.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 9f2e5718c05..725f7f036c9 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -138,7 +138,10 @@ def compare_output_impl( use_complex: bool ): base_file = sys.argv[1] - file = base_file if not use_complex else f"{base_file}_dcomplex" + if base_file.endswith(".exe"): + file = base_file if not use_complex else base_file.replace(".exe", "_dcomplex.exe") + else: + file = base_file if not use_complex else f"{base_file}_dcomplex" args = [file] + args expected_stdout = str(sourcepath / "reference" / expected_stdout) expected_stderr = str(sourcepath / "reference" / expected_stderr) From 51e4fb7e14bb364201241d9fce1cc2556a42e3c3 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 23 Oct 2024 16:57:50 +0200 Subject: [PATCH 5/5] review updates: - fix formatting Co-authored-by: Yu-Hsiang M. Tsai <19565938+yhmtsai@users.noreply.github.com> --- benchmark/test/blas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py index 98a775c4992..3ebce7d6444 100755 --- a/benchmark/test/blas.py +++ b/benchmark/test/blas.py @@ -37,4 +37,4 @@ expected_stdout="blas_dcomplex.simple.stdout", expected_stderr="blas_dcomplex.simple.stderr", use_complex=True -) \ No newline at end of file +)